1 /*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10 #include <linux/spinlock.h>
11 #include <linux/completion.h>
12 #include <linux/buffer_head.h>
13 #include <linux/blkdev.h>
14 #include <linux/gfs2_ondisk.h>
15 #include <linux/crc32.h>
16 #include <linux/iomap.h>
17
18 #include "gfs2.h"
19 #include "incore.h"
20 #include "bmap.h"
21 #include "glock.h"
22 #include "inode.h"
23 #include "meta_io.h"
24 #include "quota.h"
25 #include "rgrp.h"
26 #include "log.h"
27 #include "super.h"
28 #include "trans.h"
29 #include "dir.h"
30 #include "util.h"
31 #include "aops.h"
32 #include "trace_gfs2.h"
33
34 /* This doesn't need to be that large as max 64 bit pointers in a 4k
35 * block is 512, so __u16 is fine for that. It saves stack space to
36 * keep it small.
37 */
38 struct metapath {
39 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
40 __u16 mp_list[GFS2_MAX_META_HEIGHT];
41 int mp_fheight; /* find_metapath height */
42 int mp_aheight; /* actual height (lookup height) */
43 };
44
45 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
46
47 /**
48 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
49 * @ip: the inode
50 * @dibh: the dinode buffer
51 * @block: the block number that was allocated
52 * @page: The (optional) page. This is looked up if @page is NULL
53 *
54 * Returns: errno
55 */
56
gfs2_unstuffer_page(struct gfs2_inode * ip,struct buffer_head * dibh,u64 block,struct page * page)57 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
58 u64 block, struct page *page)
59 {
60 struct inode *inode = &ip->i_inode;
61 struct buffer_head *bh;
62 int release = 0;
63
64 if (!page || page->index) {
65 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
66 if (!page)
67 return -ENOMEM;
68 release = 1;
69 }
70
71 if (!PageUptodate(page)) {
72 void *kaddr = kmap(page);
73 u64 dsize = i_size_read(inode);
74
75 if (dsize > gfs2_max_stuffed_size(ip))
76 dsize = gfs2_max_stuffed_size(ip);
77
78 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
79 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
80 kunmap(page);
81
82 SetPageUptodate(page);
83 }
84
85 if (!page_has_buffers(page))
86 create_empty_buffers(page, BIT(inode->i_blkbits),
87 BIT(BH_Uptodate));
88
89 bh = page_buffers(page);
90
91 if (!buffer_mapped(bh))
92 map_bh(bh, inode->i_sb, block);
93
94 set_buffer_uptodate(bh);
95 if (gfs2_is_jdata(ip))
96 gfs2_trans_add_data(ip->i_gl, bh);
97 else {
98 mark_buffer_dirty(bh);
99 gfs2_ordered_add_inode(ip);
100 }
101
102 if (release) {
103 unlock_page(page);
104 put_page(page);
105 }
106
107 return 0;
108 }
109
110 /**
111 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
112 * @ip: The GFS2 inode to unstuff
113 * @page: The (optional) page. This is looked up if the @page is NULL
114 *
115 * This routine unstuffs a dinode and returns it to a "normal" state such
116 * that the height can be grown in the traditional way.
117 *
118 * Returns: errno
119 */
120
gfs2_unstuff_dinode(struct gfs2_inode * ip,struct page * page)121 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
122 {
123 struct buffer_head *bh, *dibh;
124 struct gfs2_dinode *di;
125 u64 block = 0;
126 int isdir = gfs2_is_dir(ip);
127 int error;
128
129 down_write(&ip->i_rw_mutex);
130
131 error = gfs2_meta_inode_buffer(ip, &dibh);
132 if (error)
133 goto out;
134
135 if (i_size_read(&ip->i_inode)) {
136 /* Get a free block, fill it with the stuffed data,
137 and write it out to disk */
138
139 unsigned int n = 1;
140 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
141 if (error)
142 goto out_brelse;
143 if (isdir) {
144 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
145 error = gfs2_dir_get_new_buffer(ip, block, &bh);
146 if (error)
147 goto out_brelse;
148 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
149 dibh, sizeof(struct gfs2_dinode));
150 brelse(bh);
151 } else {
152 error = gfs2_unstuffer_page(ip, dibh, block, page);
153 if (error)
154 goto out_brelse;
155 }
156 }
157
158 /* Set up the pointer to the new block */
159
160 gfs2_trans_add_meta(ip->i_gl, dibh);
161 di = (struct gfs2_dinode *)dibh->b_data;
162 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
163
164 if (i_size_read(&ip->i_inode)) {
165 *(__be64 *)(di + 1) = cpu_to_be64(block);
166 gfs2_add_inode_blocks(&ip->i_inode, 1);
167 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
168 }
169
170 ip->i_height = 1;
171 di->di_height = cpu_to_be16(1);
172
173 out_brelse:
174 brelse(dibh);
175 out:
176 up_write(&ip->i_rw_mutex);
177 return error;
178 }
179
180
181 /**
182 * find_metapath - Find path through the metadata tree
183 * @sdp: The superblock
184 * @block: The disk block to look up
185 * @mp: The metapath to return the result in
186 * @height: The pre-calculated height of the metadata tree
187 *
188 * This routine returns a struct metapath structure that defines a path
189 * through the metadata of inode "ip" to get to block "block".
190 *
191 * Example:
192 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
193 * filesystem with a blocksize of 4096.
194 *
195 * find_metapath() would return a struct metapath structure set to:
196 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
197 *
198 * That means that in order to get to the block containing the byte at
199 * offset 101342453, we would load the indirect block pointed to by pointer
200 * 0 in the dinode. We would then load the indirect block pointed to by
201 * pointer 48 in that indirect block. We would then load the data block
202 * pointed to by pointer 165 in that indirect block.
203 *
204 * ----------------------------------------
205 * | Dinode | |
206 * | | 4|
207 * | |0 1 2 3 4 5 9|
208 * | | 6|
209 * ----------------------------------------
210 * |
211 * |
212 * V
213 * ----------------------------------------
214 * | Indirect Block |
215 * | 5|
216 * | 4 4 4 4 4 5 5 1|
217 * |0 5 6 7 8 9 0 1 2|
218 * ----------------------------------------
219 * |
220 * |
221 * V
222 * ----------------------------------------
223 * | Indirect Block |
224 * | 1 1 1 1 1 5|
225 * | 6 6 6 6 6 1|
226 * |0 3 4 5 6 7 2|
227 * ----------------------------------------
228 * |
229 * |
230 * V
231 * ----------------------------------------
232 * | Data block containing offset |
233 * | 101342453 |
234 * | |
235 * | |
236 * ----------------------------------------
237 *
238 */
239
find_metapath(const struct gfs2_sbd * sdp,u64 block,struct metapath * mp,unsigned int height)240 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
241 struct metapath *mp, unsigned int height)
242 {
243 unsigned int i;
244
245 mp->mp_fheight = height;
246 for (i = height; i--;)
247 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
248 }
249
metapath_branch_start(const struct metapath * mp)250 static inline unsigned int metapath_branch_start(const struct metapath *mp)
251 {
252 if (mp->mp_list[0] == 0)
253 return 2;
254 return 1;
255 }
256
257 /**
258 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
259 * @height: The metadata height (0 = dinode)
260 * @mp: The metapath
261 */
metaptr1(unsigned int height,const struct metapath * mp)262 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
263 {
264 struct buffer_head *bh = mp->mp_bh[height];
265 if (height == 0)
266 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
267 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
268 }
269
270 /**
271 * metapointer - Return pointer to start of metadata in a buffer
272 * @height: The metadata height (0 = dinode)
273 * @mp: The metapath
274 *
275 * Return a pointer to the block number of the next height of the metadata
276 * tree given a buffer containing the pointer to the current height of the
277 * metadata tree.
278 */
279
metapointer(unsigned int height,const struct metapath * mp)280 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
281 {
282 __be64 *p = metaptr1(height, mp);
283 return p + mp->mp_list[height];
284 }
285
metaend(unsigned int height,const struct metapath * mp)286 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
287 {
288 const struct buffer_head *bh = mp->mp_bh[height];
289 return (const __be64 *)(bh->b_data + bh->b_size);
290 }
291
clone_metapath(struct metapath * clone,struct metapath * mp)292 static void clone_metapath(struct metapath *clone, struct metapath *mp)
293 {
294 unsigned int hgt;
295
296 *clone = *mp;
297 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
298 get_bh(clone->mp_bh[hgt]);
299 }
300
gfs2_metapath_ra(struct gfs2_glock * gl,__be64 * start,__be64 * end)301 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
302 {
303 const __be64 *t;
304
305 for (t = start; t < end; t++) {
306 struct buffer_head *rabh;
307
308 if (!*t)
309 continue;
310
311 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
312 if (trylock_buffer(rabh)) {
313 if (!buffer_uptodate(rabh)) {
314 rabh->b_end_io = end_buffer_read_sync;
315 submit_bh(REQ_OP_READ,
316 REQ_RAHEAD | REQ_META | REQ_PRIO,
317 rabh);
318 continue;
319 }
320 unlock_buffer(rabh);
321 }
322 brelse(rabh);
323 }
324 }
325
__fillup_metapath(struct gfs2_inode * ip,struct metapath * mp,unsigned int x,unsigned int h)326 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
327 unsigned int x, unsigned int h)
328 {
329 for (; x < h; x++) {
330 __be64 *ptr = metapointer(x, mp);
331 u64 dblock = be64_to_cpu(*ptr);
332 int ret;
333
334 if (!dblock)
335 break;
336 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
337 if (ret)
338 return ret;
339 }
340 mp->mp_aheight = x + 1;
341 return 0;
342 }
343
344 /**
345 * lookup_metapath - Walk the metadata tree to a specific point
346 * @ip: The inode
347 * @mp: The metapath
348 *
349 * Assumes that the inode's buffer has already been looked up and
350 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
351 * by find_metapath().
352 *
353 * If this function encounters part of the tree which has not been
354 * allocated, it returns the current height of the tree at the point
355 * at which it found the unallocated block. Blocks which are found are
356 * added to the mp->mp_bh[] list.
357 *
358 * Returns: error
359 */
360
lookup_metapath(struct gfs2_inode * ip,struct metapath * mp)361 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
362 {
363 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
364 }
365
366 /**
367 * fillup_metapath - fill up buffers for the metadata path to a specific height
368 * @ip: The inode
369 * @mp: The metapath
370 * @h: The height to which it should be mapped
371 *
372 * Similar to lookup_metapath, but does lookups for a range of heights
373 *
374 * Returns: error or the number of buffers filled
375 */
376
fillup_metapath(struct gfs2_inode * ip,struct metapath * mp,int h)377 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
378 {
379 unsigned int x = 0;
380 int ret;
381
382 if (h) {
383 /* find the first buffer we need to look up. */
384 for (x = h - 1; x > 0; x--) {
385 if (mp->mp_bh[x])
386 break;
387 }
388 }
389 ret = __fillup_metapath(ip, mp, x, h);
390 if (ret)
391 return ret;
392 return mp->mp_aheight - x - 1;
393 }
394
metapath_to_block(struct gfs2_sbd * sdp,struct metapath * mp)395 static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
396 {
397 sector_t factor = 1, block = 0;
398 int hgt;
399
400 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
401 if (hgt < mp->mp_aheight)
402 block += mp->mp_list[hgt] * factor;
403 factor *= sdp->sd_inptrs;
404 }
405 return block;
406 }
407
release_metapath(struct metapath * mp)408 static void release_metapath(struct metapath *mp)
409 {
410 int i;
411
412 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
413 if (mp->mp_bh[i] == NULL)
414 break;
415 brelse(mp->mp_bh[i]);
416 mp->mp_bh[i] = NULL;
417 }
418 }
419
420 /**
421 * gfs2_extent_length - Returns length of an extent of blocks
422 * @bh: The metadata block
423 * @ptr: Current position in @bh
424 * @limit: Max extent length to return
425 * @eob: Set to 1 if we hit "end of block"
426 *
427 * Returns: The length of the extent (minimum of one block)
428 */
429
gfs2_extent_length(struct buffer_head * bh,__be64 * ptr,size_t limit,int * eob)430 static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
431 {
432 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
433 const __be64 *first = ptr;
434 u64 d = be64_to_cpu(*ptr);
435
436 *eob = 0;
437 do {
438 ptr++;
439 if (ptr >= end)
440 break;
441 d++;
442 } while(be64_to_cpu(*ptr) == d);
443 if (ptr >= end)
444 *eob = 1;
445 return ptr - first;
446 }
447
448 enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
449
450 /*
451 * gfs2_metadata_walker - walk an indirect block
452 * @mp: Metapath to indirect block
453 * @ptrs: Number of pointers to look at
454 *
455 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
456 * indirect block to follow.
457 */
458 typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
459 unsigned int ptrs);
460
461 /*
462 * gfs2_walk_metadata - walk a tree of indirect blocks
463 * @inode: The inode
464 * @mp: Starting point of walk
465 * @max_len: Maximum number of blocks to walk
466 * @walker: Called during the walk
467 *
468 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
469 * past the end of metadata, and a negative error code otherwise.
470 */
471
gfs2_walk_metadata(struct inode * inode,struct metapath * mp,u64 max_len,gfs2_metadata_walker walker)472 static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
473 u64 max_len, gfs2_metadata_walker walker)
474 {
475 struct gfs2_inode *ip = GFS2_I(inode);
476 struct gfs2_sbd *sdp = GFS2_SB(inode);
477 u64 factor = 1;
478 unsigned int hgt;
479 int ret;
480
481 /*
482 * The walk starts in the lowest allocated indirect block, which may be
483 * before the position indicated by @mp. Adjust @max_len accordingly
484 * to avoid a short walk.
485 */
486 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
487 max_len += mp->mp_list[hgt] * factor;
488 mp->mp_list[hgt] = 0;
489 factor *= sdp->sd_inptrs;
490 }
491
492 for (;;) {
493 u16 start = mp->mp_list[hgt];
494 enum walker_status status;
495 unsigned int ptrs;
496 u64 len;
497
498 /* Walk indirect block. */
499 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
500 len = ptrs * factor;
501 if (len > max_len)
502 ptrs = DIV_ROUND_UP_ULL(max_len, factor);
503 status = walker(mp, ptrs);
504 switch (status) {
505 case WALK_STOP:
506 return 1;
507 case WALK_FOLLOW:
508 BUG_ON(mp->mp_aheight == mp->mp_fheight);
509 ptrs = mp->mp_list[hgt] - start;
510 len = ptrs * factor;
511 break;
512 case WALK_CONTINUE:
513 break;
514 }
515 if (len >= max_len)
516 break;
517 max_len -= len;
518 if (status == WALK_FOLLOW)
519 goto fill_up_metapath;
520
521 lower_metapath:
522 /* Decrease height of metapath. */
523 brelse(mp->mp_bh[hgt]);
524 mp->mp_bh[hgt] = NULL;
525 mp->mp_list[hgt] = 0;
526 if (!hgt)
527 break;
528 hgt--;
529 factor *= sdp->sd_inptrs;
530
531 /* Advance in metadata tree. */
532 (mp->mp_list[hgt])++;
533 if (hgt) {
534 if (mp->mp_list[hgt] >= sdp->sd_inptrs)
535 goto lower_metapath;
536 } else {
537 if (mp->mp_list[hgt] >= sdp->sd_diptrs)
538 break;
539 }
540
541 fill_up_metapath:
542 /* Increase height of metapath. */
543 ret = fillup_metapath(ip, mp, ip->i_height - 1);
544 if (ret < 0)
545 return ret;
546 hgt += ret;
547 for (; ret; ret--)
548 do_div(factor, sdp->sd_inptrs);
549 mp->mp_aheight = hgt + 1;
550 }
551 return 0;
552 }
553
gfs2_hole_walker(struct metapath * mp,unsigned int ptrs)554 static enum walker_status gfs2_hole_walker(struct metapath *mp,
555 unsigned int ptrs)
556 {
557 const __be64 *start, *ptr, *end;
558 unsigned int hgt;
559
560 hgt = mp->mp_aheight - 1;
561 start = metapointer(hgt, mp);
562 end = start + ptrs;
563
564 for (ptr = start; ptr < end; ptr++) {
565 if (*ptr) {
566 mp->mp_list[hgt] += ptr - start;
567 if (mp->mp_aheight == mp->mp_fheight)
568 return WALK_STOP;
569 return WALK_FOLLOW;
570 }
571 }
572 return WALK_CONTINUE;
573 }
574
575 /**
576 * gfs2_hole_size - figure out the size of a hole
577 * @inode: The inode
578 * @lblock: The logical starting block number
579 * @len: How far to look (in blocks)
580 * @mp: The metapath at lblock
581 * @iomap: The iomap to store the hole size in
582 *
583 * This function modifies @mp.
584 *
585 * Returns: errno on error
586 */
gfs2_hole_size(struct inode * inode,sector_t lblock,u64 len,struct metapath * mp,struct iomap * iomap)587 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
588 struct metapath *mp, struct iomap *iomap)
589 {
590 struct metapath clone;
591 u64 hole_size;
592 int ret;
593
594 clone_metapath(&clone, mp);
595 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
596 if (ret < 0)
597 goto out;
598
599 if (ret == 1)
600 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
601 else
602 hole_size = len;
603 iomap->length = hole_size << inode->i_blkbits;
604 ret = 0;
605
606 out:
607 release_metapath(&clone);
608 return ret;
609 }
610
gfs2_indirect_init(struct metapath * mp,struct gfs2_glock * gl,unsigned int i,unsigned offset,u64 bn)611 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
612 struct gfs2_glock *gl, unsigned int i,
613 unsigned offset, u64 bn)
614 {
615 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
616 ((i > 1) ? sizeof(struct gfs2_meta_header) :
617 sizeof(struct gfs2_dinode)));
618 BUG_ON(i < 1);
619 BUG_ON(mp->mp_bh[i] != NULL);
620 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
621 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
622 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
623 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
624 ptr += offset;
625 *ptr = cpu_to_be64(bn);
626 return ptr;
627 }
628
629 enum alloc_state {
630 ALLOC_DATA = 0,
631 ALLOC_GROW_DEPTH = 1,
632 ALLOC_GROW_HEIGHT = 2,
633 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
634 };
635
636 /**
637 * gfs2_iomap_alloc - Build a metadata tree of the requested height
638 * @inode: The GFS2 inode
639 * @iomap: The iomap structure
640 * @flags: iomap flags
641 * @mp: The metapath, with proper height information calculated
642 *
643 * In this routine we may have to alloc:
644 * i) Indirect blocks to grow the metadata tree height
645 * ii) Indirect blocks to fill in lower part of the metadata tree
646 * iii) Data blocks
647 *
648 * This function is called after gfs2_iomap_get, which works out the
649 * total number of blocks which we need via gfs2_alloc_size.
650 *
651 * We then do the actual allocation asking for an extent at a time (if
652 * enough contiguous free blocks are available, there will only be one
653 * allocation request per call) and uses the state machine to initialise
654 * the blocks in order.
655 *
656 * Right now, this function will allocate at most one indirect block
657 * worth of data -- with a default block size of 4K, that's slightly
658 * less than 2M. If this limitation is ever removed to allow huge
659 * allocations, we would probably still want to limit the iomap size we
660 * return to avoid stalling other tasks during huge writes; the next
661 * iomap iteration would then find the blocks already allocated.
662 *
663 * Returns: errno on error
664 */
665
gfs2_iomap_alloc(struct inode * inode,struct iomap * iomap,unsigned flags,struct metapath * mp)666 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
667 unsigned flags, struct metapath *mp)
668 {
669 struct gfs2_inode *ip = GFS2_I(inode);
670 struct gfs2_sbd *sdp = GFS2_SB(inode);
671 struct buffer_head *dibh = mp->mp_bh[0];
672 u64 bn;
673 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
674 size_t dblks = iomap->length >> inode->i_blkbits;
675 const unsigned end_of_metadata = mp->mp_fheight - 1;
676 int ret;
677 enum alloc_state state;
678 __be64 *ptr;
679 __be64 zero_bn = 0;
680
681 BUG_ON(mp->mp_aheight < 1);
682 BUG_ON(dibh == NULL);
683 BUG_ON(dblks < 1);
684
685 gfs2_trans_add_meta(ip->i_gl, dibh);
686
687 down_write(&ip->i_rw_mutex);
688
689 if (mp->mp_fheight == mp->mp_aheight) {
690 /* Bottom indirect block exists */
691 state = ALLOC_DATA;
692 } else {
693 /* Need to allocate indirect blocks */
694 if (mp->mp_fheight == ip->i_height) {
695 /* Writing into existing tree, extend tree down */
696 iblks = mp->mp_fheight - mp->mp_aheight;
697 state = ALLOC_GROW_DEPTH;
698 } else {
699 /* Building up tree height */
700 state = ALLOC_GROW_HEIGHT;
701 iblks = mp->mp_fheight - ip->i_height;
702 branch_start = metapath_branch_start(mp);
703 iblks += (mp->mp_fheight - branch_start);
704 }
705 }
706
707 /* start of the second part of the function (state machine) */
708
709 blks = dblks + iblks;
710 i = mp->mp_aheight;
711 do {
712 n = blks - alloced;
713 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
714 if (ret)
715 goto out;
716 alloced += n;
717 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
718 gfs2_trans_add_unrevoke(sdp, bn, n);
719 switch (state) {
720 /* Growing height of tree */
721 case ALLOC_GROW_HEIGHT:
722 if (i == 1) {
723 ptr = (__be64 *)(dibh->b_data +
724 sizeof(struct gfs2_dinode));
725 zero_bn = *ptr;
726 }
727 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
728 i++, n--)
729 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
730 if (i - 1 == mp->mp_fheight - ip->i_height) {
731 i--;
732 gfs2_buffer_copy_tail(mp->mp_bh[i],
733 sizeof(struct gfs2_meta_header),
734 dibh, sizeof(struct gfs2_dinode));
735 gfs2_buffer_clear_tail(dibh,
736 sizeof(struct gfs2_dinode) +
737 sizeof(__be64));
738 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
739 sizeof(struct gfs2_meta_header));
740 *ptr = zero_bn;
741 state = ALLOC_GROW_DEPTH;
742 for(i = branch_start; i < mp->mp_fheight; i++) {
743 if (mp->mp_bh[i] == NULL)
744 break;
745 brelse(mp->mp_bh[i]);
746 mp->mp_bh[i] = NULL;
747 }
748 i = branch_start;
749 }
750 if (n == 0)
751 break;
752 /* Branching from existing tree */
753 case ALLOC_GROW_DEPTH:
754 if (i > 1 && i < mp->mp_fheight)
755 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
756 for (; i < mp->mp_fheight && n > 0; i++, n--)
757 gfs2_indirect_init(mp, ip->i_gl, i,
758 mp->mp_list[i-1], bn++);
759 if (i == mp->mp_fheight)
760 state = ALLOC_DATA;
761 if (n == 0)
762 break;
763 /* Tree complete, adding data blocks */
764 case ALLOC_DATA:
765 BUG_ON(n > dblks);
766 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
767 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
768 dblks = n;
769 ptr = metapointer(end_of_metadata, mp);
770 iomap->addr = bn << inode->i_blkbits;
771 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
772 while (n-- > 0)
773 *ptr++ = cpu_to_be64(bn++);
774 break;
775 }
776 } while (iomap->addr == IOMAP_NULL_ADDR);
777
778 iomap->type = IOMAP_MAPPED;
779 iomap->length = (u64)dblks << inode->i_blkbits;
780 ip->i_height = mp->mp_fheight;
781 gfs2_add_inode_blocks(&ip->i_inode, alloced);
782 gfs2_dinode_out(ip, dibh->b_data);
783 out:
784 up_write(&ip->i_rw_mutex);
785 return ret;
786 }
787
788 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
789
790 /**
791 * gfs2_alloc_size - Compute the maximum allocation size
792 * @inode: The inode
793 * @mp: The metapath
794 * @size: Requested size in blocks
795 *
796 * Compute the maximum size of the next allocation at @mp.
797 *
798 * Returns: size in blocks
799 */
gfs2_alloc_size(struct inode * inode,struct metapath * mp,u64 size)800 static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
801 {
802 struct gfs2_inode *ip = GFS2_I(inode);
803 struct gfs2_sbd *sdp = GFS2_SB(inode);
804 const __be64 *first, *ptr, *end;
805
806 /*
807 * For writes to stuffed files, this function is called twice via
808 * gfs2_iomap_get, before and after unstuffing. The size we return the
809 * first time needs to be large enough to get the reservation and
810 * allocation sizes right. The size we return the second time must
811 * be exact or else gfs2_iomap_alloc won't do the right thing.
812 */
813
814 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
815 unsigned int maxsize = mp->mp_fheight > 1 ?
816 sdp->sd_inptrs : sdp->sd_diptrs;
817 maxsize -= mp->mp_list[mp->mp_fheight - 1];
818 if (size > maxsize)
819 size = maxsize;
820 return size;
821 }
822
823 first = metapointer(ip->i_height - 1, mp);
824 end = metaend(ip->i_height - 1, mp);
825 if (end - first > size)
826 end = first + size;
827 for (ptr = first; ptr < end; ptr++) {
828 if (*ptr)
829 break;
830 }
831 return ptr - first;
832 }
833
834 /**
835 * gfs2_iomap_get - Map blocks from an inode to disk blocks
836 * @inode: The inode
837 * @pos: Starting position in bytes
838 * @length: Length to map, in bytes
839 * @flags: iomap flags
840 * @iomap: The iomap structure
841 * @mp: The metapath
842 *
843 * Returns: errno
844 */
gfs2_iomap_get(struct inode * inode,loff_t pos,loff_t length,unsigned flags,struct iomap * iomap,struct metapath * mp)845 static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
846 unsigned flags, struct iomap *iomap,
847 struct metapath *mp)
848 {
849 struct gfs2_inode *ip = GFS2_I(inode);
850 struct gfs2_sbd *sdp = GFS2_SB(inode);
851 loff_t size = i_size_read(inode);
852 __be64 *ptr;
853 sector_t lblock;
854 sector_t lblock_stop;
855 int ret;
856 int eob;
857 u64 len;
858 struct buffer_head *dibh = NULL, *bh;
859 u8 height;
860
861 if (!length)
862 return -EINVAL;
863
864 down_read(&ip->i_rw_mutex);
865
866 ret = gfs2_meta_inode_buffer(ip, &dibh);
867 if (ret)
868 goto unlock;
869 mp->mp_bh[0] = dibh;
870
871 if (gfs2_is_stuffed(ip)) {
872 if (flags & IOMAP_WRITE) {
873 loff_t max_size = gfs2_max_stuffed_size(ip);
874
875 if (pos + length > max_size)
876 goto unstuff;
877 iomap->length = max_size;
878 } else {
879 if (pos >= size) {
880 if (flags & IOMAP_REPORT) {
881 ret = -ENOENT;
882 goto unlock;
883 } else {
884 iomap->offset = pos;
885 iomap->length = length;
886 goto hole_found;
887 }
888 }
889 iomap->length = size;
890 }
891 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
892 sizeof(struct gfs2_dinode);
893 iomap->type = IOMAP_INLINE;
894 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
895 goto out;
896 }
897
898 unstuff:
899 lblock = pos >> inode->i_blkbits;
900 iomap->offset = lblock << inode->i_blkbits;
901 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
902 len = lblock_stop - lblock + 1;
903 iomap->length = len << inode->i_blkbits;
904
905 height = ip->i_height;
906 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
907 height++;
908 find_metapath(sdp, lblock, mp, height);
909 if (height > ip->i_height || gfs2_is_stuffed(ip))
910 goto do_alloc;
911
912 ret = lookup_metapath(ip, mp);
913 if (ret)
914 goto unlock;
915
916 if (mp->mp_aheight != ip->i_height)
917 goto do_alloc;
918
919 ptr = metapointer(ip->i_height - 1, mp);
920 if (*ptr == 0)
921 goto do_alloc;
922
923 bh = mp->mp_bh[ip->i_height - 1];
924 len = gfs2_extent_length(bh, ptr, len, &eob);
925
926 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
927 iomap->length = len << inode->i_blkbits;
928 iomap->type = IOMAP_MAPPED;
929 iomap->flags |= IOMAP_F_MERGED;
930 if (eob)
931 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
932
933 out:
934 iomap->bdev = inode->i_sb->s_bdev;
935 unlock:
936 up_read(&ip->i_rw_mutex);
937 return ret;
938
939 do_alloc:
940 if (flags & IOMAP_REPORT) {
941 if (pos >= size)
942 ret = -ENOENT;
943 else if (height == ip->i_height)
944 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
945 else
946 iomap->length = size - pos;
947 } else if (flags & IOMAP_WRITE) {
948 u64 alloc_size;
949
950 if (flags & IOMAP_DIRECT)
951 goto out; /* (see gfs2_file_direct_write) */
952
953 len = gfs2_alloc_size(inode, mp, len);
954 alloc_size = len << inode->i_blkbits;
955 if (alloc_size < iomap->length)
956 iomap->length = alloc_size;
957 } else {
958 if (pos < size && height == ip->i_height)
959 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
960 }
961 hole_found:
962 iomap->addr = IOMAP_NULL_ADDR;
963 iomap->type = IOMAP_HOLE;
964 goto out;
965 }
966
gfs2_write_lock(struct inode * inode)967 static int gfs2_write_lock(struct inode *inode)
968 {
969 struct gfs2_inode *ip = GFS2_I(inode);
970 struct gfs2_sbd *sdp = GFS2_SB(inode);
971 int error;
972
973 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
974 error = gfs2_glock_nq(&ip->i_gh);
975 if (error)
976 goto out_uninit;
977 if (&ip->i_inode == sdp->sd_rindex) {
978 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
979
980 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
981 GL_NOCACHE, &m_ip->i_gh);
982 if (error)
983 goto out_unlock;
984 }
985 return 0;
986
987 out_unlock:
988 gfs2_glock_dq(&ip->i_gh);
989 out_uninit:
990 gfs2_holder_uninit(&ip->i_gh);
991 return error;
992 }
993
gfs2_write_unlock(struct inode * inode)994 static void gfs2_write_unlock(struct inode *inode)
995 {
996 struct gfs2_inode *ip = GFS2_I(inode);
997 struct gfs2_sbd *sdp = GFS2_SB(inode);
998
999 if (&ip->i_inode == sdp->sd_rindex) {
1000 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
1001
1002 gfs2_glock_dq_uninit(&m_ip->i_gh);
1003 }
1004 gfs2_glock_dq_uninit(&ip->i_gh);
1005 }
1006
gfs2_iomap_journaled_page_done(struct inode * inode,loff_t pos,unsigned copied,struct page * page,struct iomap * iomap)1007 static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
1008 unsigned copied, struct page *page,
1009 struct iomap *iomap)
1010 {
1011 struct gfs2_inode *ip = GFS2_I(inode);
1012
1013 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
1014 }
1015
gfs2_iomap_begin_write(struct inode * inode,loff_t pos,loff_t length,unsigned flags,struct iomap * iomap,struct metapath * mp)1016 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1017 loff_t length, unsigned flags,
1018 struct iomap *iomap,
1019 struct metapath *mp)
1020 {
1021 struct gfs2_inode *ip = GFS2_I(inode);
1022 struct gfs2_sbd *sdp = GFS2_SB(inode);
1023 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1024 bool unstuff, alloc_required;
1025 int ret;
1026
1027 ret = gfs2_write_lock(inode);
1028 if (ret)
1029 return ret;
1030
1031 unstuff = gfs2_is_stuffed(ip) &&
1032 pos + length > gfs2_max_stuffed_size(ip);
1033
1034 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
1035 if (ret)
1036 goto out_unlock;
1037
1038 alloc_required = unstuff || iomap->type == IOMAP_HOLE;
1039
1040 if (alloc_required || gfs2_is_jdata(ip))
1041 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1042 &ind_blocks);
1043
1044 if (alloc_required) {
1045 struct gfs2_alloc_parms ap = {
1046 .target = data_blocks + ind_blocks
1047 };
1048
1049 ret = gfs2_quota_lock_check(ip, &ap);
1050 if (ret)
1051 goto out_unlock;
1052
1053 ret = gfs2_inplace_reserve(ip, &ap);
1054 if (ret)
1055 goto out_qunlock;
1056 }
1057
1058 rblocks = RES_DINODE + ind_blocks;
1059 if (gfs2_is_jdata(ip))
1060 rblocks += data_blocks;
1061 if (ind_blocks || data_blocks)
1062 rblocks += RES_STATFS + RES_QUOTA;
1063 if (inode == sdp->sd_rindex)
1064 rblocks += 2 * RES_STATFS;
1065 if (alloc_required)
1066 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1067
1068 ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
1069 if (ret)
1070 goto out_trans_fail;
1071
1072 if (unstuff) {
1073 ret = gfs2_unstuff_dinode(ip, NULL);
1074 if (ret)
1075 goto out_trans_end;
1076 release_metapath(mp);
1077 ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
1078 flags, iomap, mp);
1079 if (ret)
1080 goto out_trans_end;
1081 }
1082
1083 if (iomap->type == IOMAP_HOLE) {
1084 ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
1085 if (ret) {
1086 gfs2_trans_end(sdp);
1087 gfs2_inplace_release(ip);
1088 punch_hole(ip, iomap->offset, iomap->length);
1089 goto out_qunlock;
1090 }
1091 }
1092 if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
1093 iomap->page_done = gfs2_iomap_journaled_page_done;
1094 return 0;
1095
1096 out_trans_end:
1097 gfs2_trans_end(sdp);
1098 out_trans_fail:
1099 if (alloc_required)
1100 gfs2_inplace_release(ip);
1101 out_qunlock:
1102 if (alloc_required)
1103 gfs2_quota_unlock(ip);
1104 out_unlock:
1105 gfs2_write_unlock(inode);
1106 return ret;
1107 }
1108
gfs2_iomap_begin(struct inode * inode,loff_t pos,loff_t length,unsigned flags,struct iomap * iomap)1109 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1110 unsigned flags, struct iomap *iomap)
1111 {
1112 struct gfs2_inode *ip = GFS2_I(inode);
1113 struct metapath mp = { .mp_aheight = 1, };
1114 int ret;
1115
1116 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1117
1118 trace_gfs2_iomap_start(ip, pos, length, flags);
1119 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
1120 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1121 } else {
1122 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1123
1124 /*
1125 * Silently fall back to buffered I/O for stuffed files or if
1126 * we've hot a hole (see gfs2_file_direct_write).
1127 */
1128 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1129 iomap->type != IOMAP_MAPPED)
1130 ret = -ENOTBLK;
1131 }
1132 if (!ret) {
1133 get_bh(mp.mp_bh[0]);
1134 iomap->private = mp.mp_bh[0];
1135 }
1136 release_metapath(&mp);
1137 trace_gfs2_iomap_end(ip, iomap, ret);
1138 return ret;
1139 }
1140
gfs2_iomap_end(struct inode * inode,loff_t pos,loff_t length,ssize_t written,unsigned flags,struct iomap * iomap)1141 static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1142 ssize_t written, unsigned flags, struct iomap *iomap)
1143 {
1144 struct gfs2_inode *ip = GFS2_I(inode);
1145 struct gfs2_sbd *sdp = GFS2_SB(inode);
1146 struct gfs2_trans *tr = current->journal_info;
1147 struct buffer_head *dibh = iomap->private;
1148
1149 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
1150 goto out;
1151
1152 if (iomap->type != IOMAP_INLINE) {
1153 gfs2_ordered_add_inode(ip);
1154
1155 if (tr->tr_num_buf_new)
1156 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1157 else
1158 gfs2_trans_add_meta(ip->i_gl, dibh);
1159 }
1160
1161 if (inode == sdp->sd_rindex) {
1162 adjust_fs_space(inode);
1163 sdp->sd_rindex_uptodate = 0;
1164 }
1165
1166 gfs2_trans_end(sdp);
1167 gfs2_inplace_release(ip);
1168
1169 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1170 /* Deallocate blocks that were just allocated. */
1171 loff_t blockmask = i_blocksize(inode) - 1;
1172 loff_t end = (pos + length) & ~blockmask;
1173
1174 pos = (pos + written + blockmask) & ~blockmask;
1175 if (pos < end) {
1176 truncate_pagecache_range(inode, pos, end - 1);
1177 punch_hole(ip, pos, end - pos);
1178 }
1179 }
1180
1181 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1182 gfs2_quota_unlock(ip);
1183 gfs2_write_unlock(inode);
1184
1185 out:
1186 if (dibh)
1187 brelse(dibh);
1188 return 0;
1189 }
1190
1191 const struct iomap_ops gfs2_iomap_ops = {
1192 .iomap_begin = gfs2_iomap_begin,
1193 .iomap_end = gfs2_iomap_end,
1194 };
1195
1196 /**
1197 * gfs2_block_map - Map one or more blocks of an inode to a disk block
1198 * @inode: The inode
1199 * @lblock: The logical block number
1200 * @bh_map: The bh to be mapped
1201 * @create: True if its ok to alloc blocks to satify the request
1202 *
1203 * The size of the requested mapping is defined in bh_map->b_size.
1204 *
1205 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1206 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1207 * bh_map->b_size to indicate the size of the mapping when @lblock and
1208 * successive blocks are mapped, up to the requested size.
1209 *
1210 * Sets buffer_boundary() if a read of metadata will be required
1211 * before the next block can be mapped. Sets buffer_new() if new
1212 * blocks were allocated.
1213 *
1214 * Returns: errno
1215 */
1216
gfs2_block_map(struct inode * inode,sector_t lblock,struct buffer_head * bh_map,int create)1217 int gfs2_block_map(struct inode *inode, sector_t lblock,
1218 struct buffer_head *bh_map, int create)
1219 {
1220 struct gfs2_inode *ip = GFS2_I(inode);
1221 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1222 loff_t length = bh_map->b_size;
1223 struct metapath mp = { .mp_aheight = 1, };
1224 struct iomap iomap = { };
1225 int ret;
1226
1227 clear_buffer_mapped(bh_map);
1228 clear_buffer_new(bh_map);
1229 clear_buffer_boundary(bh_map);
1230 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1231
1232 if (create) {
1233 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1234 if (!ret && iomap.type == IOMAP_HOLE)
1235 ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
1236 release_metapath(&mp);
1237 } else {
1238 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1239 release_metapath(&mp);
1240 }
1241 if (ret)
1242 goto out;
1243
1244 if (iomap.length > bh_map->b_size) {
1245 iomap.length = bh_map->b_size;
1246 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
1247 }
1248 if (iomap.addr != IOMAP_NULL_ADDR)
1249 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1250 bh_map->b_size = iomap.length;
1251 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
1252 set_buffer_boundary(bh_map);
1253 if (iomap.flags & IOMAP_F_NEW)
1254 set_buffer_new(bh_map);
1255
1256 out:
1257 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1258 return ret;
1259 }
1260
1261 /*
1262 * Deprecated: do not use in new code
1263 */
gfs2_extent_map(struct inode * inode,u64 lblock,int * new,u64 * dblock,unsigned * extlen)1264 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1265 {
1266 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
1267 int ret;
1268 int create = *new;
1269
1270 BUG_ON(!extlen);
1271 BUG_ON(!dblock);
1272 BUG_ON(!new);
1273
1274 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
1275 ret = gfs2_block_map(inode, lblock, &bh, create);
1276 *extlen = bh.b_size >> inode->i_blkbits;
1277 *dblock = bh.b_blocknr;
1278 if (buffer_new(&bh))
1279 *new = 1;
1280 else
1281 *new = 0;
1282 return ret;
1283 }
1284
1285 /**
1286 * gfs2_block_zero_range - Deal with zeroing out data
1287 *
1288 * This is partly borrowed from ext3.
1289 */
gfs2_block_zero_range(struct inode * inode,loff_t from,unsigned int length)1290 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1291 unsigned int length)
1292 {
1293 struct address_space *mapping = inode->i_mapping;
1294 struct gfs2_inode *ip = GFS2_I(inode);
1295 unsigned long index = from >> PAGE_SHIFT;
1296 unsigned offset = from & (PAGE_SIZE-1);
1297 unsigned blocksize, iblock, pos;
1298 struct buffer_head *bh;
1299 struct page *page;
1300 int err;
1301
1302 page = find_or_create_page(mapping, index, GFP_NOFS);
1303 if (!page)
1304 return 0;
1305
1306 blocksize = inode->i_sb->s_blocksize;
1307 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
1308
1309 if (!page_has_buffers(page))
1310 create_empty_buffers(page, blocksize, 0);
1311
1312 /* Find the buffer that contains "offset" */
1313 bh = page_buffers(page);
1314 pos = blocksize;
1315 while (offset >= pos) {
1316 bh = bh->b_this_page;
1317 iblock++;
1318 pos += blocksize;
1319 }
1320
1321 err = 0;
1322
1323 if (!buffer_mapped(bh)) {
1324 gfs2_block_map(inode, iblock, bh, 0);
1325 /* unmapped? It's a hole - nothing to do */
1326 if (!buffer_mapped(bh))
1327 goto unlock;
1328 }
1329
1330 /* Ok, it's mapped. Make sure it's up-to-date */
1331 if (PageUptodate(page))
1332 set_buffer_uptodate(bh);
1333
1334 if (!buffer_uptodate(bh)) {
1335 err = -EIO;
1336 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1337 wait_on_buffer(bh);
1338 /* Uhhuh. Read error. Complain and punt. */
1339 if (!buffer_uptodate(bh))
1340 goto unlock;
1341 err = 0;
1342 }
1343
1344 if (gfs2_is_jdata(ip))
1345 gfs2_trans_add_data(ip->i_gl, bh);
1346 else
1347 gfs2_ordered_add_inode(ip);
1348
1349 zero_user(page, offset, length);
1350 mark_buffer_dirty(bh);
1351 unlock:
1352 unlock_page(page);
1353 put_page(page);
1354 return err;
1355 }
1356
1357 #define GFS2_JTRUNC_REVOKES 8192
1358
1359 /**
1360 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1361 * @inode: The inode being truncated
1362 * @oldsize: The original (larger) size
1363 * @newsize: The new smaller size
1364 *
1365 * With jdata files, we have to journal a revoke for each block which is
1366 * truncated. As a result, we need to split this into separate transactions
1367 * if the number of pages being truncated gets too large.
1368 */
1369
gfs2_journaled_truncate(struct inode * inode,u64 oldsize,u64 newsize)1370 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1371 {
1372 struct gfs2_sbd *sdp = GFS2_SB(inode);
1373 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1374 u64 chunk;
1375 int error;
1376
1377 while (oldsize != newsize) {
1378 struct gfs2_trans *tr;
1379 unsigned int offs;
1380
1381 chunk = oldsize - newsize;
1382 if (chunk > max_chunk)
1383 chunk = max_chunk;
1384
1385 offs = oldsize & ~PAGE_MASK;
1386 if (offs && chunk > PAGE_SIZE)
1387 chunk = offs + ((chunk - offs) & PAGE_MASK);
1388
1389 truncate_pagecache(inode, oldsize - chunk);
1390 oldsize -= chunk;
1391
1392 tr = current->journal_info;
1393 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1394 continue;
1395
1396 gfs2_trans_end(sdp);
1397 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1398 if (error)
1399 return error;
1400 }
1401
1402 return 0;
1403 }
1404
trunc_start(struct inode * inode,u64 newsize)1405 static int trunc_start(struct inode *inode, u64 newsize)
1406 {
1407 struct gfs2_inode *ip = GFS2_I(inode);
1408 struct gfs2_sbd *sdp = GFS2_SB(inode);
1409 struct buffer_head *dibh = NULL;
1410 int journaled = gfs2_is_jdata(ip);
1411 u64 oldsize = inode->i_size;
1412 int error;
1413
1414 if (journaled)
1415 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1416 else
1417 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1418 if (error)
1419 return error;
1420
1421 error = gfs2_meta_inode_buffer(ip, &dibh);
1422 if (error)
1423 goto out;
1424
1425 gfs2_trans_add_meta(ip->i_gl, dibh);
1426
1427 if (gfs2_is_stuffed(ip)) {
1428 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1429 } else {
1430 unsigned int blocksize = i_blocksize(inode);
1431 unsigned int offs = newsize & (blocksize - 1);
1432 if (offs) {
1433 error = gfs2_block_zero_range(inode, newsize,
1434 blocksize - offs);
1435 if (error)
1436 goto out;
1437 }
1438 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1439 }
1440
1441 i_size_write(inode, newsize);
1442 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1443 gfs2_dinode_out(ip, dibh->b_data);
1444
1445 if (journaled)
1446 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1447 else
1448 truncate_pagecache(inode, newsize);
1449
1450 out:
1451 brelse(dibh);
1452 if (current->journal_info)
1453 gfs2_trans_end(sdp);
1454 return error;
1455 }
1456
gfs2_iomap_get_alloc(struct inode * inode,loff_t pos,loff_t length,struct iomap * iomap)1457 int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1458 struct iomap *iomap)
1459 {
1460 struct metapath mp = { .mp_aheight = 1, };
1461 int ret;
1462
1463 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1464 if (!ret && iomap->type == IOMAP_HOLE)
1465 ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1466 release_metapath(&mp);
1467 return ret;
1468 }
1469
1470 /**
1471 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1472 * @ip: inode
1473 * @rg_gh: holder of resource group glock
1474 * @bh: buffer head to sweep
1475 * @start: starting point in bh
1476 * @end: end point in bh
1477 * @meta: true if bh points to metadata (rather than data)
1478 * @btotal: place to keep count of total blocks freed
1479 *
1480 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1481 * free, and free them all. However, we do it one rgrp at a time. If this
1482 * block has references to multiple rgrps, we break it into individual
1483 * transactions. This allows other processes to use the rgrps while we're
1484 * focused on a single one, for better concurrency / performance.
1485 * At every transaction boundary, we rewrite the inode into the journal.
1486 * That way the bitmaps are kept consistent with the inode and we can recover
1487 * if we're interrupted by power-outages.
1488 *
1489 * Returns: 0, or return code if an error occurred.
1490 * *btotal has the total number of blocks freed
1491 */
sweep_bh_for_rgrps(struct gfs2_inode * ip,struct gfs2_holder * rd_gh,struct buffer_head * bh,__be64 * start,__be64 * end,bool meta,u32 * btotal)1492 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1493 struct buffer_head *bh, __be64 *start, __be64 *end,
1494 bool meta, u32 *btotal)
1495 {
1496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1497 struct gfs2_rgrpd *rgd;
1498 struct gfs2_trans *tr;
1499 __be64 *p;
1500 int blks_outside_rgrp;
1501 u64 bn, bstart, isize_blks;
1502 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1503 int ret = 0;
1504 bool buf_in_tr = false; /* buffer was added to transaction */
1505
1506 more_rgrps:
1507 rgd = NULL;
1508 if (gfs2_holder_initialized(rd_gh)) {
1509 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1510 gfs2_assert_withdraw(sdp,
1511 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1512 }
1513 blks_outside_rgrp = 0;
1514 bstart = 0;
1515 blen = 0;
1516
1517 for (p = start; p < end; p++) {
1518 if (!*p)
1519 continue;
1520 bn = be64_to_cpu(*p);
1521
1522 if (rgd) {
1523 if (!rgrp_contains_block(rgd, bn)) {
1524 blks_outside_rgrp++;
1525 continue;
1526 }
1527 } else {
1528 rgd = gfs2_blk2rgrpd(sdp, bn, true);
1529 if (unlikely(!rgd)) {
1530 ret = -EIO;
1531 goto out;
1532 }
1533 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1534 0, rd_gh);
1535 if (ret)
1536 goto out;
1537
1538 /* Must be done with the rgrp glock held: */
1539 if (gfs2_rs_active(&ip->i_res) &&
1540 rgd == ip->i_res.rs_rbm.rgd)
1541 gfs2_rs_deltree(&ip->i_res);
1542 }
1543
1544 /* The size of our transactions will be unknown until we
1545 actually process all the metadata blocks that relate to
1546 the rgrp. So we estimate. We know it can't be more than
1547 the dinode's i_blocks and we don't want to exceed the
1548 journal flush threshold, sd_log_thresh2. */
1549 if (current->journal_info == NULL) {
1550 unsigned int jblocks_rqsted, revokes;
1551
1552 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1553 RES_INDIRECT;
1554 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1555 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1556 jblocks_rqsted +=
1557 atomic_read(&sdp->sd_log_thresh2);
1558 else
1559 jblocks_rqsted += isize_blks;
1560 revokes = jblocks_rqsted;
1561 if (meta)
1562 revokes += end - start;
1563 else if (ip->i_depth)
1564 revokes += sdp->sd_inptrs;
1565 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1566 if (ret)
1567 goto out_unlock;
1568 down_write(&ip->i_rw_mutex);
1569 }
1570 /* check if we will exceed the transaction blocks requested */
1571 tr = current->journal_info;
1572 if (tr->tr_num_buf_new + RES_STATFS +
1573 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1574 /* We set blks_outside_rgrp to ensure the loop will
1575 be repeated for the same rgrp, but with a new
1576 transaction. */
1577 blks_outside_rgrp++;
1578 /* This next part is tricky. If the buffer was added
1579 to the transaction, we've already set some block
1580 pointers to 0, so we better follow through and free
1581 them, or we will introduce corruption (so break).
1582 This may be impossible, or at least rare, but I
1583 decided to cover the case regardless.
1584
1585 If the buffer was not added to the transaction
1586 (this call), doing so would exceed our transaction
1587 size, so we need to end the transaction and start a
1588 new one (so goto). */
1589
1590 if (buf_in_tr)
1591 break;
1592 goto out_unlock;
1593 }
1594
1595 gfs2_trans_add_meta(ip->i_gl, bh);
1596 buf_in_tr = true;
1597 *p = 0;
1598 if (bstart + blen == bn) {
1599 blen++;
1600 continue;
1601 }
1602 if (bstart) {
1603 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1604 (*btotal) += blen;
1605 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1606 }
1607 bstart = bn;
1608 blen = 1;
1609 }
1610 if (bstart) {
1611 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1612 (*btotal) += blen;
1613 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1614 }
1615 out_unlock:
1616 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1617 outside the rgrp we just processed,
1618 do it all over again. */
1619 if (current->journal_info) {
1620 struct buffer_head *dibh;
1621
1622 ret = gfs2_meta_inode_buffer(ip, &dibh);
1623 if (ret)
1624 goto out;
1625
1626 /* Every transaction boundary, we rewrite the dinode
1627 to keep its di_blocks current in case of failure. */
1628 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1629 current_time(&ip->i_inode);
1630 gfs2_trans_add_meta(ip->i_gl, dibh);
1631 gfs2_dinode_out(ip, dibh->b_data);
1632 brelse(dibh);
1633 up_write(&ip->i_rw_mutex);
1634 gfs2_trans_end(sdp);
1635 buf_in_tr = false;
1636 }
1637 gfs2_glock_dq_uninit(rd_gh);
1638 cond_resched();
1639 goto more_rgrps;
1640 }
1641 out:
1642 return ret;
1643 }
1644
mp_eq_to_hgt(struct metapath * mp,__u16 * list,unsigned int h)1645 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1646 {
1647 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1648 return false;
1649 return true;
1650 }
1651
1652 /**
1653 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1654 * @mp: starting metapath
1655 * @h: desired height to search
1656 *
1657 * Assumes the metapath is valid (with buffers) out to height h.
1658 * Returns: true if a non-null pointer was found in the metapath buffer
1659 * false if all remaining pointers are NULL in the buffer
1660 */
find_nonnull_ptr(struct gfs2_sbd * sdp,struct metapath * mp,unsigned int h,__u16 * end_list,unsigned int end_aligned)1661 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1662 unsigned int h,
1663 __u16 *end_list, unsigned int end_aligned)
1664 {
1665 struct buffer_head *bh = mp->mp_bh[h];
1666 __be64 *first, *ptr, *end;
1667
1668 first = metaptr1(h, mp);
1669 ptr = first + mp->mp_list[h];
1670 end = (__be64 *)(bh->b_data + bh->b_size);
1671 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1672 bool keep_end = h < end_aligned;
1673 end = first + end_list[h] + keep_end;
1674 }
1675
1676 while (ptr < end) {
1677 if (*ptr) { /* if we have a non-null pointer */
1678 mp->mp_list[h] = ptr - first;
1679 h++;
1680 if (h < GFS2_MAX_META_HEIGHT)
1681 mp->mp_list[h] = 0;
1682 return true;
1683 }
1684 ptr++;
1685 }
1686 return false;
1687 }
1688
1689 enum dealloc_states {
1690 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1691 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1692 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1693 DEALLOC_DONE = 3, /* process complete */
1694 };
1695
1696 static inline void
metapointer_range(struct metapath * mp,int height,__u16 * start_list,unsigned int start_aligned,__u16 * end_list,unsigned int end_aligned,__be64 ** start,__be64 ** end)1697 metapointer_range(struct metapath *mp, int height,
1698 __u16 *start_list, unsigned int start_aligned,
1699 __u16 *end_list, unsigned int end_aligned,
1700 __be64 **start, __be64 **end)
1701 {
1702 struct buffer_head *bh = mp->mp_bh[height];
1703 __be64 *first;
1704
1705 first = metaptr1(height, mp);
1706 *start = first;
1707 if (mp_eq_to_hgt(mp, start_list, height)) {
1708 bool keep_start = height < start_aligned;
1709 *start = first + start_list[height] + keep_start;
1710 }
1711 *end = (__be64 *)(bh->b_data + bh->b_size);
1712 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1713 bool keep_end = height < end_aligned;
1714 *end = first + end_list[height] + keep_end;
1715 }
1716 }
1717
walk_done(struct gfs2_sbd * sdp,struct metapath * mp,int height,__u16 * end_list,unsigned int end_aligned)1718 static inline bool walk_done(struct gfs2_sbd *sdp,
1719 struct metapath *mp, int height,
1720 __u16 *end_list, unsigned int end_aligned)
1721 {
1722 __u16 end;
1723
1724 if (end_list) {
1725 bool keep_end = height < end_aligned;
1726 if (!mp_eq_to_hgt(mp, end_list, height))
1727 return false;
1728 end = end_list[height] + keep_end;
1729 } else
1730 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1731 return mp->mp_list[height] >= end;
1732 }
1733
1734 /**
1735 * punch_hole - deallocate blocks in a file
1736 * @ip: inode to truncate
1737 * @offset: the start of the hole
1738 * @length: the size of the hole (or 0 for truncate)
1739 *
1740 * Punch a hole into a file or truncate a file at a given position. This
1741 * function operates in whole blocks (@offset and @length are rounded
1742 * accordingly); partially filled blocks must be cleared otherwise.
1743 *
1744 * This function works from the bottom up, and from the right to the left. In
1745 * other words, it strips off the highest layer (data) before stripping any of
1746 * the metadata. Doing it this way is best in case the operation is interrupted
1747 * by power failure, etc. The dinode is rewritten in every transaction to
1748 * guarantee integrity.
1749 */
punch_hole(struct gfs2_inode * ip,u64 offset,u64 length)1750 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1751 {
1752 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1753 u64 maxsize = sdp->sd_heightsize[ip->i_height];
1754 struct metapath mp = {};
1755 struct buffer_head *dibh, *bh;
1756 struct gfs2_holder rd_gh;
1757 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1758 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1759 __u16 start_list[GFS2_MAX_META_HEIGHT];
1760 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1761 unsigned int start_aligned, uninitialized_var(end_aligned);
1762 unsigned int strip_h = ip->i_height - 1;
1763 u32 btotal = 0;
1764 int ret, state;
1765 int mp_h; /* metapath buffers are read in to this height */
1766 u64 prev_bnr = 0;
1767 __be64 *start, *end;
1768
1769 if (offset >= maxsize) {
1770 /*
1771 * The starting point lies beyond the allocated meta-data;
1772 * there are no blocks do deallocate.
1773 */
1774 return 0;
1775 }
1776
1777 /*
1778 * The start position of the hole is defined by lblock, start_list, and
1779 * start_aligned. The end position of the hole is defined by lend,
1780 * end_list, and end_aligned.
1781 *
1782 * start_aligned and end_aligned define down to which height the start
1783 * and end positions are aligned to the metadata tree (i.e., the
1784 * position is a multiple of the metadata granularity at the height
1785 * above). This determines at which heights additional meta pointers
1786 * needs to be preserved for the remaining data.
1787 */
1788
1789 if (length) {
1790 u64 end_offset = offset + length;
1791 u64 lend;
1792
1793 /*
1794 * Clip the end at the maximum file size for the given height:
1795 * that's how far the metadata goes; files bigger than that
1796 * will have additional layers of indirection.
1797 */
1798 if (end_offset > maxsize)
1799 end_offset = maxsize;
1800 lend = end_offset >> bsize_shift;
1801
1802 if (lblock >= lend)
1803 return 0;
1804
1805 find_metapath(sdp, lend, &mp, ip->i_height);
1806 end_list = __end_list;
1807 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1808
1809 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1810 if (end_list[mp_h])
1811 break;
1812 }
1813 end_aligned = mp_h;
1814 }
1815
1816 find_metapath(sdp, lblock, &mp, ip->i_height);
1817 memcpy(start_list, mp.mp_list, sizeof(start_list));
1818
1819 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1820 if (start_list[mp_h])
1821 break;
1822 }
1823 start_aligned = mp_h;
1824
1825 ret = gfs2_meta_inode_buffer(ip, &dibh);
1826 if (ret)
1827 return ret;
1828
1829 mp.mp_bh[0] = dibh;
1830 ret = lookup_metapath(ip, &mp);
1831 if (ret)
1832 goto out_metapath;
1833
1834 /* issue read-ahead on metadata */
1835 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1836 metapointer_range(&mp, mp_h, start_list, start_aligned,
1837 end_list, end_aligned, &start, &end);
1838 gfs2_metapath_ra(ip->i_gl, start, end);
1839 }
1840
1841 if (mp.mp_aheight == ip->i_height)
1842 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1843 else
1844 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1845
1846 ret = gfs2_rindex_update(sdp);
1847 if (ret)
1848 goto out_metapath;
1849
1850 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1851 if (ret)
1852 goto out_metapath;
1853 gfs2_holder_mark_uninitialized(&rd_gh);
1854
1855 mp_h = strip_h;
1856
1857 while (state != DEALLOC_DONE) {
1858 switch (state) {
1859 /* Truncate a full metapath at the given strip height.
1860 * Note that strip_h == mp_h in order to be in this state. */
1861 case DEALLOC_MP_FULL:
1862 bh = mp.mp_bh[mp_h];
1863 gfs2_assert_withdraw(sdp, bh);
1864 if (gfs2_assert_withdraw(sdp,
1865 prev_bnr != bh->b_blocknr)) {
1866 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1867 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1868 sdp->sd_fsname,
1869 (unsigned long long)ip->i_no_addr,
1870 prev_bnr, ip->i_height, strip_h, mp_h);
1871 }
1872 prev_bnr = bh->b_blocknr;
1873
1874 if (gfs2_metatype_check(sdp, bh,
1875 (mp_h ? GFS2_METATYPE_IN :
1876 GFS2_METATYPE_DI))) {
1877 ret = -EIO;
1878 goto out;
1879 }
1880
1881 /*
1882 * Below, passing end_aligned as 0 gives us the
1883 * metapointer range excluding the end point: the end
1884 * point is the first metapath we must not deallocate!
1885 */
1886
1887 metapointer_range(&mp, mp_h, start_list, start_aligned,
1888 end_list, 0 /* end_aligned */,
1889 &start, &end);
1890 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1891 start, end,
1892 mp_h != ip->i_height - 1,
1893 &btotal);
1894
1895 /* If we hit an error or just swept dinode buffer,
1896 just exit. */
1897 if (ret || !mp_h) {
1898 state = DEALLOC_DONE;
1899 break;
1900 }
1901 state = DEALLOC_MP_LOWER;
1902 break;
1903
1904 /* lower the metapath strip height */
1905 case DEALLOC_MP_LOWER:
1906 /* We're done with the current buffer, so release it,
1907 unless it's the dinode buffer. Then back up to the
1908 previous pointer. */
1909 if (mp_h) {
1910 brelse(mp.mp_bh[mp_h]);
1911 mp.mp_bh[mp_h] = NULL;
1912 }
1913 /* If we can't get any lower in height, we've stripped
1914 off all we can. Next step is to back up and start
1915 stripping the previous level of metadata. */
1916 if (mp_h == 0) {
1917 strip_h--;
1918 memcpy(mp.mp_list, start_list, sizeof(start_list));
1919 mp_h = strip_h;
1920 state = DEALLOC_FILL_MP;
1921 break;
1922 }
1923 mp.mp_list[mp_h] = 0;
1924 mp_h--; /* search one metadata height down */
1925 mp.mp_list[mp_h]++;
1926 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1927 break;
1928 /* Here we've found a part of the metapath that is not
1929 * allocated. We need to search at that height for the
1930 * next non-null pointer. */
1931 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1932 state = DEALLOC_FILL_MP;
1933 mp_h++;
1934 }
1935 /* No more non-null pointers at this height. Back up
1936 to the previous height and try again. */
1937 break; /* loop around in the same state */
1938
1939 /* Fill the metapath with buffers to the given height. */
1940 case DEALLOC_FILL_MP:
1941 /* Fill the buffers out to the current height. */
1942 ret = fillup_metapath(ip, &mp, mp_h);
1943 if (ret < 0)
1944 goto out;
1945
1946 /* On the first pass, issue read-ahead on metadata. */
1947 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1948 unsigned int height = mp.mp_aheight - 1;
1949
1950 /* No read-ahead for data blocks. */
1951 if (mp.mp_aheight - 1 == strip_h)
1952 height--;
1953
1954 for (; height >= mp.mp_aheight - ret; height--) {
1955 metapointer_range(&mp, height,
1956 start_list, start_aligned,
1957 end_list, end_aligned,
1958 &start, &end);
1959 gfs2_metapath_ra(ip->i_gl, start, end);
1960 }
1961 }
1962
1963 /* If buffers found for the entire strip height */
1964 if (mp.mp_aheight - 1 == strip_h) {
1965 state = DEALLOC_MP_FULL;
1966 break;
1967 }
1968 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1969 mp_h = mp.mp_aheight - 1;
1970
1971 /* If we find a non-null block pointer, crawl a bit
1972 higher up in the metapath and try again, otherwise
1973 we need to look lower for a new starting point. */
1974 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1975 mp_h++;
1976 else
1977 state = DEALLOC_MP_LOWER;
1978 break;
1979 }
1980 }
1981
1982 if (btotal) {
1983 if (current->journal_info == NULL) {
1984 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1985 RES_QUOTA, 0);
1986 if (ret)
1987 goto out;
1988 down_write(&ip->i_rw_mutex);
1989 }
1990 gfs2_statfs_change(sdp, 0, +btotal, 0);
1991 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1992 ip->i_inode.i_gid);
1993 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1994 gfs2_trans_add_meta(ip->i_gl, dibh);
1995 gfs2_dinode_out(ip, dibh->b_data);
1996 up_write(&ip->i_rw_mutex);
1997 gfs2_trans_end(sdp);
1998 }
1999
2000 out:
2001 if (gfs2_holder_initialized(&rd_gh))
2002 gfs2_glock_dq_uninit(&rd_gh);
2003 if (current->journal_info) {
2004 up_write(&ip->i_rw_mutex);
2005 gfs2_trans_end(sdp);
2006 cond_resched();
2007 }
2008 gfs2_quota_unhold(ip);
2009 out_metapath:
2010 release_metapath(&mp);
2011 return ret;
2012 }
2013
trunc_end(struct gfs2_inode * ip)2014 static int trunc_end(struct gfs2_inode *ip)
2015 {
2016 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2017 struct buffer_head *dibh;
2018 int error;
2019
2020 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2021 if (error)
2022 return error;
2023
2024 down_write(&ip->i_rw_mutex);
2025
2026 error = gfs2_meta_inode_buffer(ip, &dibh);
2027 if (error)
2028 goto out;
2029
2030 if (!i_size_read(&ip->i_inode)) {
2031 ip->i_height = 0;
2032 ip->i_goal = ip->i_no_addr;
2033 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
2034 gfs2_ordered_del_inode(ip);
2035 }
2036 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2037 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
2038
2039 gfs2_trans_add_meta(ip->i_gl, dibh);
2040 gfs2_dinode_out(ip, dibh->b_data);
2041 brelse(dibh);
2042
2043 out:
2044 up_write(&ip->i_rw_mutex);
2045 gfs2_trans_end(sdp);
2046 return error;
2047 }
2048
2049 /**
2050 * do_shrink - make a file smaller
2051 * @inode: the inode
2052 * @newsize: the size to make the file
2053 *
2054 * Called with an exclusive lock on @inode. The @size must
2055 * be equal to or smaller than the current inode size.
2056 *
2057 * Returns: errno
2058 */
2059
do_shrink(struct inode * inode,u64 newsize)2060 static int do_shrink(struct inode *inode, u64 newsize)
2061 {
2062 struct gfs2_inode *ip = GFS2_I(inode);
2063 int error;
2064
2065 error = trunc_start(inode, newsize);
2066 if (error < 0)
2067 return error;
2068 if (gfs2_is_stuffed(ip))
2069 return 0;
2070
2071 error = punch_hole(ip, newsize, 0);
2072 if (error == 0)
2073 error = trunc_end(ip);
2074
2075 return error;
2076 }
2077
gfs2_trim_blocks(struct inode * inode)2078 void gfs2_trim_blocks(struct inode *inode)
2079 {
2080 int ret;
2081
2082 ret = do_shrink(inode, inode->i_size);
2083 WARN_ON(ret != 0);
2084 }
2085
2086 /**
2087 * do_grow - Touch and update inode size
2088 * @inode: The inode
2089 * @size: The new size
2090 *
2091 * This function updates the timestamps on the inode and
2092 * may also increase the size of the inode. This function
2093 * must not be called with @size any smaller than the current
2094 * inode size.
2095 *
2096 * Although it is not strictly required to unstuff files here,
2097 * earlier versions of GFS2 have a bug in the stuffed file reading
2098 * code which will result in a buffer overrun if the size is larger
2099 * than the max stuffed file size. In order to prevent this from
2100 * occurring, such files are unstuffed, but in other cases we can
2101 * just update the inode size directly.
2102 *
2103 * Returns: 0 on success, or -ve on error
2104 */
2105
do_grow(struct inode * inode,u64 size)2106 static int do_grow(struct inode *inode, u64 size)
2107 {
2108 struct gfs2_inode *ip = GFS2_I(inode);
2109 struct gfs2_sbd *sdp = GFS2_SB(inode);
2110 struct gfs2_alloc_parms ap = { .target = 1, };
2111 struct buffer_head *dibh;
2112 int error;
2113 int unstuff = 0;
2114
2115 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
2116 error = gfs2_quota_lock_check(ip, &ap);
2117 if (error)
2118 return error;
2119
2120 error = gfs2_inplace_reserve(ip, &ap);
2121 if (error)
2122 goto do_grow_qunlock;
2123 unstuff = 1;
2124 }
2125
2126 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
2127 (unstuff &&
2128 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
2129 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2130 0 : RES_QUOTA), 0);
2131 if (error)
2132 goto do_grow_release;
2133
2134 if (unstuff) {
2135 error = gfs2_unstuff_dinode(ip, NULL);
2136 if (error)
2137 goto do_end_trans;
2138 }
2139
2140 error = gfs2_meta_inode_buffer(ip, &dibh);
2141 if (error)
2142 goto do_end_trans;
2143
2144 i_size_write(inode, size);
2145 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2146 gfs2_trans_add_meta(ip->i_gl, dibh);
2147 gfs2_dinode_out(ip, dibh->b_data);
2148 brelse(dibh);
2149
2150 do_end_trans:
2151 gfs2_trans_end(sdp);
2152 do_grow_release:
2153 if (unstuff) {
2154 gfs2_inplace_release(ip);
2155 do_grow_qunlock:
2156 gfs2_quota_unlock(ip);
2157 }
2158 return error;
2159 }
2160
2161 /**
2162 * gfs2_setattr_size - make a file a given size
2163 * @inode: the inode
2164 * @newsize: the size to make the file
2165 *
2166 * The file size can grow, shrink, or stay the same size. This
2167 * is called holding i_rwsem and an exclusive glock on the inode
2168 * in question.
2169 *
2170 * Returns: errno
2171 */
2172
gfs2_setattr_size(struct inode * inode,u64 newsize)2173 int gfs2_setattr_size(struct inode *inode, u64 newsize)
2174 {
2175 struct gfs2_inode *ip = GFS2_I(inode);
2176 int ret;
2177
2178 BUG_ON(!S_ISREG(inode->i_mode));
2179
2180 ret = inode_newsize_ok(inode, newsize);
2181 if (ret)
2182 return ret;
2183
2184 inode_dio_wait(inode);
2185
2186 ret = gfs2_rsqa_alloc(ip);
2187 if (ret)
2188 goto out;
2189
2190 if (newsize >= inode->i_size) {
2191 ret = do_grow(inode, newsize);
2192 goto out;
2193 }
2194
2195 ret = do_shrink(inode, newsize);
2196 out:
2197 gfs2_rsqa_delete(ip, NULL);
2198 return ret;
2199 }
2200
gfs2_truncatei_resume(struct gfs2_inode * ip)2201 int gfs2_truncatei_resume(struct gfs2_inode *ip)
2202 {
2203 int error;
2204 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
2205 if (!error)
2206 error = trunc_end(ip);
2207 return error;
2208 }
2209
gfs2_file_dealloc(struct gfs2_inode * ip)2210 int gfs2_file_dealloc(struct gfs2_inode *ip)
2211 {
2212 return punch_hole(ip, 0, 0);
2213 }
2214
2215 /**
2216 * gfs2_free_journal_extents - Free cached journal bmap info
2217 * @jd: The journal
2218 *
2219 */
2220
gfs2_free_journal_extents(struct gfs2_jdesc * jd)2221 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2222 {
2223 struct gfs2_journal_extent *jext;
2224
2225 while(!list_empty(&jd->extent_list)) {
2226 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2227 list_del(&jext->list);
2228 kfree(jext);
2229 }
2230 }
2231
2232 /**
2233 * gfs2_add_jextent - Add or merge a new extent to extent cache
2234 * @jd: The journal descriptor
2235 * @lblock: The logical block at start of new extent
2236 * @dblock: The physical block at start of new extent
2237 * @blocks: Size of extent in fs blocks
2238 *
2239 * Returns: 0 on success or -ENOMEM
2240 */
2241
gfs2_add_jextent(struct gfs2_jdesc * jd,u64 lblock,u64 dblock,u64 blocks)2242 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2243 {
2244 struct gfs2_journal_extent *jext;
2245
2246 if (!list_empty(&jd->extent_list)) {
2247 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2248 if ((jext->dblock + jext->blocks) == dblock) {
2249 jext->blocks += blocks;
2250 return 0;
2251 }
2252 }
2253
2254 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2255 if (jext == NULL)
2256 return -ENOMEM;
2257 jext->dblock = dblock;
2258 jext->lblock = lblock;
2259 jext->blocks = blocks;
2260 list_add_tail(&jext->list, &jd->extent_list);
2261 jd->nr_extents++;
2262 return 0;
2263 }
2264
2265 /**
2266 * gfs2_map_journal_extents - Cache journal bmap info
2267 * @sdp: The super block
2268 * @jd: The journal to map
2269 *
2270 * Create a reusable "extent" mapping from all logical
2271 * blocks to all physical blocks for the given journal. This will save
2272 * us time when writing journal blocks. Most journals will have only one
2273 * extent that maps all their logical blocks. That's because gfs2.mkfs
2274 * arranges the journal blocks sequentially to maximize performance.
2275 * So the extent would map the first block for the entire file length.
2276 * However, gfs2_jadd can happen while file activity is happening, so
2277 * those journals may not be sequential. Less likely is the case where
2278 * the users created their own journals by mounting the metafs and
2279 * laying it out. But it's still possible. These journals might have
2280 * several extents.
2281 *
2282 * Returns: 0 on success, or error on failure
2283 */
2284
gfs2_map_journal_extents(struct gfs2_sbd * sdp,struct gfs2_jdesc * jd)2285 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2286 {
2287 u64 lblock = 0;
2288 u64 lblock_stop;
2289 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2290 struct buffer_head bh;
2291 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2292 u64 size;
2293 int rc;
2294
2295 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2296 size = (lblock_stop - lblock) << shift;
2297 jd->nr_extents = 0;
2298 WARN_ON(!list_empty(&jd->extent_list));
2299
2300 do {
2301 bh.b_state = 0;
2302 bh.b_blocknr = 0;
2303 bh.b_size = size;
2304 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2305 if (rc || !buffer_mapped(&bh))
2306 goto fail;
2307 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2308 if (rc)
2309 goto fail;
2310 size -= bh.b_size;
2311 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2312 } while(size > 0);
2313
2314 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
2315 jd->nr_extents);
2316 return 0;
2317
2318 fail:
2319 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2320 rc, jd->jd_jid,
2321 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2322 jd->nr_extents);
2323 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2324 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2325 bh.b_state, (unsigned long long)bh.b_size);
2326 gfs2_free_journal_extents(jd);
2327 return rc;
2328 }
2329
2330 /**
2331 * gfs2_write_alloc_required - figure out if a write will require an allocation
2332 * @ip: the file being written to
2333 * @offset: the offset to write to
2334 * @len: the number of bytes being written
2335 *
2336 * Returns: 1 if an alloc is required, 0 otherwise
2337 */
2338
gfs2_write_alloc_required(struct gfs2_inode * ip,u64 offset,unsigned int len)2339 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2340 unsigned int len)
2341 {
2342 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2343 struct buffer_head bh;
2344 unsigned int shift;
2345 u64 lblock, lblock_stop, size;
2346 u64 end_of_file;
2347
2348 if (!len)
2349 return 0;
2350
2351 if (gfs2_is_stuffed(ip)) {
2352 if (offset + len > gfs2_max_stuffed_size(ip))
2353 return 1;
2354 return 0;
2355 }
2356
2357 shift = sdp->sd_sb.sb_bsize_shift;
2358 BUG_ON(gfs2_is_dir(ip));
2359 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2360 lblock = offset >> shift;
2361 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2362 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
2363 return 1;
2364
2365 size = (lblock_stop - lblock) << shift;
2366 do {
2367 bh.b_state = 0;
2368 bh.b_size = size;
2369 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2370 if (!buffer_mapped(&bh))
2371 return 1;
2372 size -= bh.b_size;
2373 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2374 } while(size > 0);
2375
2376 return 0;
2377 }
2378
stuffed_zero_range(struct inode * inode,loff_t offset,loff_t length)2379 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2380 {
2381 struct gfs2_inode *ip = GFS2_I(inode);
2382 struct buffer_head *dibh;
2383 int error;
2384
2385 if (offset >= inode->i_size)
2386 return 0;
2387 if (offset + length > inode->i_size)
2388 length = inode->i_size - offset;
2389
2390 error = gfs2_meta_inode_buffer(ip, &dibh);
2391 if (error)
2392 return error;
2393 gfs2_trans_add_meta(ip->i_gl, dibh);
2394 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2395 length);
2396 brelse(dibh);
2397 return 0;
2398 }
2399
gfs2_journaled_truncate_range(struct inode * inode,loff_t offset,loff_t length)2400 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2401 loff_t length)
2402 {
2403 struct gfs2_sbd *sdp = GFS2_SB(inode);
2404 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2405 int error;
2406
2407 while (length) {
2408 struct gfs2_trans *tr;
2409 loff_t chunk;
2410 unsigned int offs;
2411
2412 chunk = length;
2413 if (chunk > max_chunk)
2414 chunk = max_chunk;
2415
2416 offs = offset & ~PAGE_MASK;
2417 if (offs && chunk > PAGE_SIZE)
2418 chunk = offs + ((chunk - offs) & PAGE_MASK);
2419
2420 truncate_pagecache_range(inode, offset, chunk);
2421 offset += chunk;
2422 length -= chunk;
2423
2424 tr = current->journal_info;
2425 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2426 continue;
2427
2428 gfs2_trans_end(sdp);
2429 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2430 if (error)
2431 return error;
2432 }
2433 return 0;
2434 }
2435
__gfs2_punch_hole(struct file * file,loff_t offset,loff_t length)2436 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2437 {
2438 struct inode *inode = file_inode(file);
2439 struct gfs2_inode *ip = GFS2_I(inode);
2440 struct gfs2_sbd *sdp = GFS2_SB(inode);
2441 int error;
2442
2443 if (gfs2_is_jdata(ip))
2444 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2445 GFS2_JTRUNC_REVOKES);
2446 else
2447 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2448 if (error)
2449 return error;
2450
2451 if (gfs2_is_stuffed(ip)) {
2452 error = stuffed_zero_range(inode, offset, length);
2453 if (error)
2454 goto out;
2455 } else {
2456 unsigned int start_off, end_len, blocksize;
2457
2458 blocksize = i_blocksize(inode);
2459 start_off = offset & (blocksize - 1);
2460 end_len = (offset + length) & (blocksize - 1);
2461 if (start_off) {
2462 unsigned int len = length;
2463 if (length > blocksize - start_off)
2464 len = blocksize - start_off;
2465 error = gfs2_block_zero_range(inode, offset, len);
2466 if (error)
2467 goto out;
2468 if (start_off + length < blocksize)
2469 end_len = 0;
2470 }
2471 if (end_len) {
2472 error = gfs2_block_zero_range(inode,
2473 offset + length - end_len, end_len);
2474 if (error)
2475 goto out;
2476 }
2477 }
2478
2479 if (gfs2_is_jdata(ip)) {
2480 BUG_ON(!current->journal_info);
2481 gfs2_journaled_truncate_range(inode, offset, length);
2482 } else
2483 truncate_pagecache_range(inode, offset, offset + length - 1);
2484
2485 file_update_time(file);
2486 mark_inode_dirty(inode);
2487
2488 if (current->journal_info)
2489 gfs2_trans_end(sdp);
2490
2491 if (!gfs2_is_stuffed(ip))
2492 error = punch_hole(ip, offset, length);
2493
2494 out:
2495 if (current->journal_info)
2496 gfs2_trans_end(sdp);
2497 return error;
2498 }
2499