1 /* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19 #include "xfs.h" 20 #include "xfs_fs.h" 21 #include "xfs_types.h" 22 #include "xfs_bit.h" 23 #include "xfs_log.h" 24 #include "xfs_inum.h" 25 #include "xfs_trans.h" 26 #include "xfs_sb.h" 27 #include "xfs_ag.h" 28 #include "xfs_dir2.h" 29 #include "xfs_mount.h" 30 #include "xfs_da_btree.h" 31 #include "xfs_bmap_btree.h" 32 #include "xfs_ialloc_btree.h" 33 #include "xfs_dinode.h" 34 #include "xfs_inode.h" 35 #include "xfs_inode_item.h" 36 #include "xfs_itable.h" 37 #include "xfs_ialloc.h" 38 #include "xfs_alloc.h" 39 #include "xfs_bmap.h" 40 #include "xfs_acl.h" 41 #include "xfs_attr.h" 42 #include "xfs_rw.h" 43 #include "xfs_error.h" 44 #include "xfs_quota.h" 45 #include "xfs_utils.h" 46 #include "xfs_rtalloc.h" 47 #include "xfs_trans_space.h" 48 #include "xfs_log_priv.h" 49 #include "xfs_filestream.h" 50 #include "xfs_vnodeops.h" 51 #include "xfs_trace.h" 52 53 /* 54 * The maximum pathlen is 1024 bytes. Since the minimum file system 55 * blocksize is 512 bytes, we can get a max of 2 extents back from 56 * bmapi. 57 */ 58 #define SYMLINK_MAPS 2 59 60 STATIC int xfs_readlink_bmap(xfs_inode_t * ip,char * link)61 xfs_readlink_bmap( 62 xfs_inode_t *ip, 63 char *link) 64 { 65 xfs_mount_t *mp = ip->i_mount; 66 int pathlen = ip->i_d.di_size; 67 int nmaps = SYMLINK_MAPS; 68 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 69 xfs_daddr_t d; 70 int byte_cnt; 71 int n; 72 xfs_buf_t *bp; 73 int error = 0; 74 75 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps, 76 0); 77 if (error) 78 goto out; 79 80 for (n = 0; n < nmaps; n++) { 81 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 82 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 83 84 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 85 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); 86 if (!bp) 87 return XFS_ERROR(ENOMEM); 88 error = bp->b_error; 89 if (error) { 90 xfs_buf_ioerror_alert(bp, __func__); 91 xfs_buf_relse(bp); 92 goto out; 93 } 94 if (pathlen < byte_cnt) 95 byte_cnt = pathlen; 96 pathlen -= byte_cnt; 97 98 memcpy(link, bp->b_addr, byte_cnt); 99 xfs_buf_relse(bp); 100 } 101 102 link[ip->i_d.di_size] = '\0'; 103 error = 0; 104 105 out: 106 return error; 107 } 108 109 int xfs_readlink(xfs_inode_t * ip,char * link)110 xfs_readlink( 111 xfs_inode_t *ip, 112 char *link) 113 { 114 xfs_mount_t *mp = ip->i_mount; 115 xfs_fsize_t pathlen; 116 int error = 0; 117 118 trace_xfs_readlink(ip); 119 120 if (XFS_FORCED_SHUTDOWN(mp)) 121 return XFS_ERROR(EIO); 122 123 xfs_ilock(ip, XFS_ILOCK_SHARED); 124 125 pathlen = ip->i_d.di_size; 126 if (!pathlen) 127 goto out; 128 129 if (pathlen < 0 || pathlen > MAXPATHLEN) { 130 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)", 131 __func__, (unsigned long long) ip->i_ino, 132 (long long) pathlen); 133 ASSERT(0); 134 error = XFS_ERROR(EFSCORRUPTED); 135 goto out; 136 } 137 138 139 if (ip->i_df.if_flags & XFS_IFINLINE) { 140 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 141 link[pathlen] = '\0'; 142 } else { 143 error = xfs_readlink_bmap(ip, link); 144 } 145 146 out: 147 xfs_iunlock(ip, XFS_ILOCK_SHARED); 148 return error; 149 } 150 151 /* 152 * Flags for xfs_free_eofblocks 153 */ 154 #define XFS_FREE_EOF_TRYLOCK (1<<0) 155 156 /* 157 * This is called by xfs_inactive to free any blocks beyond eof 158 * when the link count isn't zero and by xfs_dm_punch_hole() when 159 * punching a hole to EOF. 160 */ 161 STATIC int xfs_free_eofblocks(xfs_mount_t * mp,xfs_inode_t * ip,int flags)162 xfs_free_eofblocks( 163 xfs_mount_t *mp, 164 xfs_inode_t *ip, 165 int flags) 166 { 167 xfs_trans_t *tp; 168 int error; 169 xfs_fileoff_t end_fsb; 170 xfs_fileoff_t last_fsb; 171 xfs_filblks_t map_len; 172 int nimaps; 173 xfs_bmbt_irec_t imap; 174 175 /* 176 * Figure out if there are any blocks beyond the end 177 * of the file. If not, then there is nothing to do. 178 */ 179 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 180 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 181 if (last_fsb <= end_fsb) 182 return 0; 183 map_len = last_fsb - end_fsb; 184 185 nimaps = 1; 186 xfs_ilock(ip, XFS_ILOCK_SHARED); 187 error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 188 xfs_iunlock(ip, XFS_ILOCK_SHARED); 189 190 if (!error && (nimaps != 0) && 191 (imap.br_startblock != HOLESTARTBLOCK || 192 ip->i_delayed_blks)) { 193 /* 194 * Attach the dquots to the inode up front. 195 */ 196 error = xfs_qm_dqattach(ip, 0); 197 if (error) 198 return error; 199 200 /* 201 * There are blocks after the end of file. 202 * Free them up now by truncating the file to 203 * its current size. 204 */ 205 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 206 207 if (flags & XFS_FREE_EOF_TRYLOCK) { 208 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 209 xfs_trans_cancel(tp, 0); 210 return 0; 211 } 212 } else { 213 xfs_ilock(ip, XFS_IOLOCK_EXCL); 214 } 215 216 error = xfs_trans_reserve(tp, 0, 217 XFS_ITRUNCATE_LOG_RES(mp), 218 0, XFS_TRANS_PERM_LOG_RES, 219 XFS_ITRUNCATE_LOG_COUNT); 220 if (error) { 221 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 222 xfs_trans_cancel(tp, 0); 223 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 224 return error; 225 } 226 227 xfs_ilock(ip, XFS_ILOCK_EXCL); 228 xfs_trans_ijoin(tp, ip, 0); 229 230 /* 231 * Do not update the on-disk file size. If we update the 232 * on-disk file size and then the system crashes before the 233 * contents of the file are flushed to disk then the files 234 * may be full of holes (ie NULL files bug). 235 */ 236 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 237 XFS_ISIZE(ip)); 238 if (error) { 239 /* 240 * If we get an error at this point we simply don't 241 * bother truncating the file. 242 */ 243 xfs_trans_cancel(tp, 244 (XFS_TRANS_RELEASE_LOG_RES | 245 XFS_TRANS_ABORT)); 246 } else { 247 error = xfs_trans_commit(tp, 248 XFS_TRANS_RELEASE_LOG_RES); 249 } 250 xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); 251 } 252 return error; 253 } 254 255 /* 256 * Free a symlink that has blocks associated with it. 257 */ 258 STATIC int xfs_inactive_symlink_rmt(xfs_inode_t * ip,xfs_trans_t ** tpp)259 xfs_inactive_symlink_rmt( 260 xfs_inode_t *ip, 261 xfs_trans_t **tpp) 262 { 263 xfs_buf_t *bp; 264 int committed; 265 int done; 266 int error; 267 xfs_fsblock_t first_block; 268 xfs_bmap_free_t free_list; 269 int i; 270 xfs_mount_t *mp; 271 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 272 int nmaps; 273 xfs_trans_t *ntp; 274 int size; 275 xfs_trans_t *tp; 276 277 tp = *tpp; 278 mp = ip->i_mount; 279 ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); 280 /* 281 * We're freeing a symlink that has some 282 * blocks allocated to it. Free the 283 * blocks here. We know that we've got 284 * either 1 or 2 extents and that we can 285 * free them all in one bunmapi call. 286 */ 287 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 288 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 289 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 290 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 291 xfs_trans_cancel(tp, 0); 292 *tpp = NULL; 293 return error; 294 } 295 /* 296 * Lock the inode, fix the size, and join it to the transaction. 297 * Hold it so in the normal path, we still have it locked for 298 * the second transaction. In the error paths we need it 299 * held so the cancel won't rele it, see below. 300 */ 301 xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 302 size = (int)ip->i_d.di_size; 303 ip->i_d.di_size = 0; 304 xfs_trans_ijoin(tp, ip, 0); 305 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 306 /* 307 * Find the block(s) so we can inval and unmap them. 308 */ 309 done = 0; 310 xfs_bmap_init(&free_list, &first_block); 311 nmaps = ARRAY_SIZE(mval); 312 error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size), 313 mval, &nmaps, 0); 314 if (error) 315 goto error0; 316 /* 317 * Invalidate the block(s). 318 */ 319 for (i = 0; i < nmaps; i++) { 320 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 321 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 322 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 323 if (!bp) { 324 error = ENOMEM; 325 goto error1; 326 } 327 xfs_trans_binval(tp, bp); 328 } 329 /* 330 * Unmap the dead block(s) to the free_list. 331 */ 332 if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, 333 &first_block, &free_list, &done))) 334 goto error1; 335 ASSERT(done); 336 /* 337 * Commit the first transaction. This logs the EFI and the inode. 338 */ 339 if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) 340 goto error1; 341 /* 342 * The transaction must have been committed, since there were 343 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. 344 * The new tp has the extent freeing and EFDs. 345 */ 346 ASSERT(committed); 347 /* 348 * The first xact was committed, so add the inode to the new one. 349 * Mark it dirty so it will be logged and moved forward in the log as 350 * part of every commit. 351 */ 352 xfs_trans_ijoin(tp, ip, 0); 353 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 354 /* 355 * Get a new, empty transaction to return to our caller. 356 */ 357 ntp = xfs_trans_dup(tp); 358 /* 359 * Commit the transaction containing extent freeing and EFDs. 360 * If we get an error on the commit here or on the reserve below, 361 * we need to unlock the inode since the new transaction doesn't 362 * have the inode attached. 363 */ 364 error = xfs_trans_commit(tp, 0); 365 tp = ntp; 366 if (error) { 367 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 368 goto error0; 369 } 370 /* 371 * transaction commit worked ok so we can drop the extra ticket 372 * reference that we gained in xfs_trans_dup() 373 */ 374 xfs_log_ticket_put(tp->t_ticket); 375 376 /* 377 * Remove the memory for extent descriptions (just bookkeeping). 378 */ 379 if (ip->i_df.if_bytes) 380 xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); 381 ASSERT(ip->i_df.if_bytes == 0); 382 /* 383 * Put an itruncate log reservation in the new transaction 384 * for our caller. 385 */ 386 if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 387 XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { 388 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 389 goto error0; 390 } 391 /* 392 * Return with the inode locked but not joined to the transaction. 393 */ 394 *tpp = tp; 395 return 0; 396 397 error1: 398 xfs_bmap_cancel(&free_list); 399 error0: 400 /* 401 * Have to come here with the inode locked and either 402 * (held and in the transaction) or (not in the transaction). 403 * If the inode isn't held then cancel would iput it, but 404 * that's wrong since this is inactive and the vnode ref 405 * count is 0 already. 406 * Cancel won't do anything to the inode if held, but it still 407 * needs to be locked until the cancel is done, if it was 408 * joined to the transaction. 409 */ 410 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 411 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 412 *tpp = NULL; 413 return error; 414 415 } 416 417 STATIC int xfs_inactive_symlink_local(xfs_inode_t * ip,xfs_trans_t ** tpp)418 xfs_inactive_symlink_local( 419 xfs_inode_t *ip, 420 xfs_trans_t **tpp) 421 { 422 int error; 423 424 ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip)); 425 /* 426 * We're freeing a symlink which fit into 427 * the inode. Just free the memory used 428 * to hold the old symlink. 429 */ 430 error = xfs_trans_reserve(*tpp, 0, 431 XFS_ITRUNCATE_LOG_RES(ip->i_mount), 432 0, XFS_TRANS_PERM_LOG_RES, 433 XFS_ITRUNCATE_LOG_COUNT); 434 435 if (error) { 436 xfs_trans_cancel(*tpp, 0); 437 *tpp = NULL; 438 return error; 439 } 440 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 441 442 /* 443 * Zero length symlinks _can_ exist. 444 */ 445 if (ip->i_df.if_bytes > 0) { 446 xfs_idata_realloc(ip, 447 -(ip->i_df.if_bytes), 448 XFS_DATA_FORK); 449 ASSERT(ip->i_df.if_bytes == 0); 450 } 451 return 0; 452 } 453 454 STATIC int xfs_inactive_attrs(xfs_inode_t * ip,xfs_trans_t ** tpp)455 xfs_inactive_attrs( 456 xfs_inode_t *ip, 457 xfs_trans_t **tpp) 458 { 459 xfs_trans_t *tp; 460 int error; 461 xfs_mount_t *mp; 462 463 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 464 tp = *tpp; 465 mp = ip->i_mount; 466 ASSERT(ip->i_d.di_forkoff != 0); 467 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 468 xfs_iunlock(ip, XFS_ILOCK_EXCL); 469 if (error) 470 goto error_unlock; 471 472 error = xfs_attr_inactive(ip); 473 if (error) 474 goto error_unlock; 475 476 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 477 error = xfs_trans_reserve(tp, 0, 478 XFS_IFREE_LOG_RES(mp), 479 0, XFS_TRANS_PERM_LOG_RES, 480 XFS_INACTIVE_LOG_COUNT); 481 if (error) 482 goto error_cancel; 483 484 xfs_ilock(ip, XFS_ILOCK_EXCL); 485 xfs_trans_ijoin(tp, ip, 0); 486 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 487 488 ASSERT(ip->i_d.di_anextents == 0); 489 490 *tpp = tp; 491 return 0; 492 493 error_cancel: 494 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 495 xfs_trans_cancel(tp, 0); 496 error_unlock: 497 *tpp = NULL; 498 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 499 return error; 500 } 501 502 int xfs_release(xfs_inode_t * ip)503 xfs_release( 504 xfs_inode_t *ip) 505 { 506 xfs_mount_t *mp = ip->i_mount; 507 int error; 508 509 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0)) 510 return 0; 511 512 /* If this is a read-only mount, don't do this (would generate I/O) */ 513 if (mp->m_flags & XFS_MOUNT_RDONLY) 514 return 0; 515 516 if (!XFS_FORCED_SHUTDOWN(mp)) { 517 int truncated; 518 519 /* 520 * If we are using filestreams, and we have an unlinked 521 * file that we are processing the last close on, then nothing 522 * will be able to reopen and write to this file. Purge this 523 * inode from the filestreams cache so that it doesn't delay 524 * teardown of the inode. 525 */ 526 if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) 527 xfs_filestream_deassociate(ip); 528 529 /* 530 * If we previously truncated this file and removed old data 531 * in the process, we want to initiate "early" writeout on 532 * the last close. This is an attempt to combat the notorious 533 * NULL files problem which is particularly noticeable from a 534 * truncate down, buffered (re-)write (delalloc), followed by 535 * a crash. What we are effectively doing here is 536 * significantly reducing the time window where we'd otherwise 537 * be exposed to that problem. 538 */ 539 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 540 if (truncated) { 541 xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 542 if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) 543 xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); 544 } 545 } 546 547 if (ip->i_d.di_nlink == 0) 548 return 0; 549 550 if ((S_ISREG(ip->i_d.di_mode) && 551 (VFS_I(ip)->i_size > 0 || 552 (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && 553 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 554 (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 555 556 /* 557 * If we can't get the iolock just skip truncating the blocks 558 * past EOF because we could deadlock with the mmap_sem 559 * otherwise. We'll get another chance to drop them once the 560 * last reference to the inode is dropped, so we'll never leak 561 * blocks permanently. 562 * 563 * Further, check if the inode is being opened, written and 564 * closed frequently and we have delayed allocation blocks 565 * outstanding (e.g. streaming writes from the NFS server), 566 * truncating the blocks past EOF will cause fragmentation to 567 * occur. 568 * 569 * In this case don't do the truncation, either, but we have to 570 * be careful how we detect this case. Blocks beyond EOF show 571 * up as i_delayed_blks even when the inode is clean, so we 572 * need to truncate them away first before checking for a dirty 573 * release. Hence on the first dirty close we will still remove 574 * the speculative allocation, but after that we will leave it 575 * in place. 576 */ 577 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) 578 return 0; 579 580 error = xfs_free_eofblocks(mp, ip, 581 XFS_FREE_EOF_TRYLOCK); 582 if (error) 583 return error; 584 585 /* delalloc blocks after truncation means it really is dirty */ 586 if (ip->i_delayed_blks) 587 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); 588 } 589 return 0; 590 } 591 592 /* 593 * xfs_inactive 594 * 595 * This is called when the vnode reference count for the vnode 596 * goes to zero. If the file has been unlinked, then it must 597 * now be truncated. Also, we clear all of the read-ahead state 598 * kept for the inode here since the file is now closed. 599 */ 600 int xfs_inactive(xfs_inode_t * ip)601 xfs_inactive( 602 xfs_inode_t *ip) 603 { 604 xfs_bmap_free_t free_list; 605 xfs_fsblock_t first_block; 606 int committed; 607 xfs_trans_t *tp; 608 xfs_mount_t *mp; 609 int error; 610 int truncate; 611 612 /* 613 * If the inode is already free, then there can be nothing 614 * to clean up here. 615 */ 616 if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { 617 ASSERT(ip->i_df.if_real_bytes == 0); 618 ASSERT(ip->i_df.if_broot_bytes == 0); 619 return VN_INACTIVE_CACHE; 620 } 621 622 /* 623 * Only do a truncate if it's a regular file with 624 * some actual space in it. It's OK to look at the 625 * inode's fields without the lock because we're the 626 * only one with a reference to the inode. 627 */ 628 truncate = ((ip->i_d.di_nlink == 0) && 629 ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 || 630 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && 631 S_ISREG(ip->i_d.di_mode)); 632 633 mp = ip->i_mount; 634 635 error = 0; 636 637 /* If this is a read-only mount, don't do this (would generate I/O) */ 638 if (mp->m_flags & XFS_MOUNT_RDONLY) 639 goto out; 640 641 if (ip->i_d.di_nlink != 0) { 642 if ((S_ISREG(ip->i_d.di_mode) && 643 (VFS_I(ip)->i_size > 0 || 644 (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && 645 (ip->i_df.if_flags & XFS_IFEXTENTS) && 646 (!(ip->i_d.di_flags & 647 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 648 ip->i_delayed_blks != 0))) { 649 error = xfs_free_eofblocks(mp, ip, 0); 650 if (error) 651 return VN_INACTIVE_CACHE; 652 } 653 goto out; 654 } 655 656 ASSERT(ip->i_d.di_nlink == 0); 657 658 error = xfs_qm_dqattach(ip, 0); 659 if (error) 660 return VN_INACTIVE_CACHE; 661 662 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 663 if (truncate) { 664 xfs_ilock(ip, XFS_IOLOCK_EXCL); 665 666 error = xfs_trans_reserve(tp, 0, 667 XFS_ITRUNCATE_LOG_RES(mp), 668 0, XFS_TRANS_PERM_LOG_RES, 669 XFS_ITRUNCATE_LOG_COUNT); 670 if (error) { 671 /* Don't call itruncate_cleanup */ 672 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 673 xfs_trans_cancel(tp, 0); 674 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 675 return VN_INACTIVE_CACHE; 676 } 677 678 xfs_ilock(ip, XFS_ILOCK_EXCL); 679 xfs_trans_ijoin(tp, ip, 0); 680 681 ip->i_d.di_size = 0; 682 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 683 684 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); 685 if (error) { 686 xfs_trans_cancel(tp, 687 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 688 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 689 return VN_INACTIVE_CACHE; 690 } 691 692 ASSERT(ip->i_d.di_nextents == 0); 693 } else if (S_ISLNK(ip->i_d.di_mode)) { 694 695 /* 696 * If we get an error while cleaning up a 697 * symlink we bail out. 698 */ 699 error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ? 700 xfs_inactive_symlink_rmt(ip, &tp) : 701 xfs_inactive_symlink_local(ip, &tp); 702 703 if (error) { 704 ASSERT(tp == NULL); 705 return VN_INACTIVE_CACHE; 706 } 707 708 xfs_trans_ijoin(tp, ip, 0); 709 } else { 710 error = xfs_trans_reserve(tp, 0, 711 XFS_IFREE_LOG_RES(mp), 712 0, XFS_TRANS_PERM_LOG_RES, 713 XFS_INACTIVE_LOG_COUNT); 714 if (error) { 715 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 716 xfs_trans_cancel(tp, 0); 717 return VN_INACTIVE_CACHE; 718 } 719 720 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 721 xfs_trans_ijoin(tp, ip, 0); 722 } 723 724 /* 725 * If there are attributes associated with the file 726 * then blow them away now. The code calls a routine 727 * that recursively deconstructs the attribute fork. 728 * We need to just commit the current transaction 729 * because we can't use it for xfs_attr_inactive(). 730 */ 731 if (ip->i_d.di_anextents > 0) { 732 error = xfs_inactive_attrs(ip, &tp); 733 /* 734 * If we got an error, the transaction is already 735 * cancelled, and the inode is unlocked. Just get out. 736 */ 737 if (error) 738 return VN_INACTIVE_CACHE; 739 } else if (ip->i_afp) { 740 xfs_idestroy_fork(ip, XFS_ATTR_FORK); 741 } 742 743 /* 744 * Free the inode. 745 */ 746 xfs_bmap_init(&free_list, &first_block); 747 error = xfs_ifree(tp, ip, &free_list); 748 if (error) { 749 /* 750 * If we fail to free the inode, shut down. The cancel 751 * might do that, we need to make sure. Otherwise the 752 * inode might be lost for a long time or forever. 753 */ 754 if (!XFS_FORCED_SHUTDOWN(mp)) { 755 xfs_notice(mp, "%s: xfs_ifree returned error %d", 756 __func__, error); 757 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 758 } 759 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 760 } else { 761 /* 762 * Credit the quota account(s). The inode is gone. 763 */ 764 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); 765 766 /* 767 * Just ignore errors at this point. There is nothing we can 768 * do except to try to keep going. Make sure it's not a silent 769 * error. 770 */ 771 error = xfs_bmap_finish(&tp, &free_list, &committed); 772 if (error) 773 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 774 __func__, error); 775 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 776 if (error) 777 xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 778 __func__, error); 779 } 780 781 /* 782 * Release the dquots held by inode, if any. 783 */ 784 xfs_qm_dqdetach(ip); 785 xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 786 787 out: 788 return VN_INACTIVE_CACHE; 789 } 790 791 /* 792 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 793 * is allowed, otherwise it has to be an exact match. If a CI match is found, 794 * ci_name->name will point to a the actual name (caller must free) or 795 * will be set to NULL if an exact match is found. 796 */ 797 int xfs_lookup(xfs_inode_t * dp,struct xfs_name * name,xfs_inode_t ** ipp,struct xfs_name * ci_name)798 xfs_lookup( 799 xfs_inode_t *dp, 800 struct xfs_name *name, 801 xfs_inode_t **ipp, 802 struct xfs_name *ci_name) 803 { 804 xfs_ino_t inum; 805 int error; 806 uint lock_mode; 807 808 trace_xfs_lookup(dp, name); 809 810 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 811 return XFS_ERROR(EIO); 812 813 lock_mode = xfs_ilock_map_shared(dp); 814 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 815 xfs_iunlock_map_shared(dp, lock_mode); 816 817 if (error) 818 goto out; 819 820 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 821 if (error) 822 goto out_free_name; 823 824 return 0; 825 826 out_free_name: 827 if (ci_name) 828 kmem_free(ci_name->name); 829 out: 830 *ipp = NULL; 831 return error; 832 } 833 834 int xfs_create(xfs_inode_t * dp,struct xfs_name * name,umode_t mode,xfs_dev_t rdev,xfs_inode_t ** ipp)835 xfs_create( 836 xfs_inode_t *dp, 837 struct xfs_name *name, 838 umode_t mode, 839 xfs_dev_t rdev, 840 xfs_inode_t **ipp) 841 { 842 int is_dir = S_ISDIR(mode); 843 struct xfs_mount *mp = dp->i_mount; 844 struct xfs_inode *ip = NULL; 845 struct xfs_trans *tp = NULL; 846 int error; 847 xfs_bmap_free_t free_list; 848 xfs_fsblock_t first_block; 849 boolean_t unlock_dp_on_error = B_FALSE; 850 uint cancel_flags; 851 int committed; 852 prid_t prid; 853 struct xfs_dquot *udqp = NULL; 854 struct xfs_dquot *gdqp = NULL; 855 uint resblks; 856 uint log_res; 857 uint log_count; 858 859 trace_xfs_create(dp, name); 860 861 if (XFS_FORCED_SHUTDOWN(mp)) 862 return XFS_ERROR(EIO); 863 864 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 865 prid = xfs_get_projid(dp); 866 else 867 prid = XFS_PROJID_DEFAULT; 868 869 /* 870 * Make sure that we have allocated dquot(s) on disk. 871 */ 872 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 873 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 874 if (error) 875 return error; 876 877 if (is_dir) { 878 rdev = 0; 879 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 880 log_res = XFS_MKDIR_LOG_RES(mp); 881 log_count = XFS_MKDIR_LOG_COUNT; 882 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); 883 } else { 884 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 885 log_res = XFS_CREATE_LOG_RES(mp); 886 log_count = XFS_CREATE_LOG_COUNT; 887 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 888 } 889 890 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 891 892 /* 893 * Initially assume that the file does not exist and 894 * reserve the resources for that case. If that is not 895 * the case we'll drop the one we have and get a more 896 * appropriate transaction later. 897 */ 898 error = xfs_trans_reserve(tp, resblks, log_res, 0, 899 XFS_TRANS_PERM_LOG_RES, log_count); 900 if (error == ENOSPC) { 901 /* flush outstanding delalloc blocks and retry */ 902 xfs_flush_inodes(dp); 903 error = xfs_trans_reserve(tp, resblks, log_res, 0, 904 XFS_TRANS_PERM_LOG_RES, log_count); 905 } 906 if (error == ENOSPC) { 907 /* No space at all so try a "no-allocation" reservation */ 908 resblks = 0; 909 error = xfs_trans_reserve(tp, 0, log_res, 0, 910 XFS_TRANS_PERM_LOG_RES, log_count); 911 } 912 if (error) { 913 cancel_flags = 0; 914 goto out_trans_cancel; 915 } 916 917 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 918 unlock_dp_on_error = B_TRUE; 919 920 xfs_bmap_init(&free_list, &first_block); 921 922 /* 923 * Reserve disk quota and the inode. 924 */ 925 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 926 if (error) 927 goto out_trans_cancel; 928 929 error = xfs_dir_canenter(tp, dp, name, resblks); 930 if (error) 931 goto out_trans_cancel; 932 933 /* 934 * A newly created regular or special file just has one directory 935 * entry pointing to them, but a directory also the "." entry 936 * pointing to itself. 937 */ 938 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 939 prid, resblks > 0, &ip, &committed); 940 if (error) { 941 if (error == ENOSPC) 942 goto out_trans_cancel; 943 goto out_trans_abort; 944 } 945 946 /* 947 * Now we join the directory inode to the transaction. We do not do it 948 * earlier because xfs_dir_ialloc might commit the previous transaction 949 * (and release all the locks). An error from here on will result in 950 * the transaction cancel unlocking dp so don't do it explicitly in the 951 * error path. 952 */ 953 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 954 unlock_dp_on_error = B_FALSE; 955 956 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 957 &first_block, &free_list, resblks ? 958 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 959 if (error) { 960 ASSERT(error != ENOSPC); 961 goto out_trans_abort; 962 } 963 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 964 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 965 966 if (is_dir) { 967 error = xfs_dir_init(tp, ip, dp); 968 if (error) 969 goto out_bmap_cancel; 970 971 error = xfs_bumplink(tp, dp); 972 if (error) 973 goto out_bmap_cancel; 974 } 975 976 /* 977 * If this is a synchronous mount, make sure that the 978 * create transaction goes to disk before returning to 979 * the user. 980 */ 981 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 982 xfs_trans_set_sync(tp); 983 984 /* 985 * Attach the dquot(s) to the inodes and modify them incore. 986 * These ids of the inode couldn't have changed since the new 987 * inode has been locked ever since it was created. 988 */ 989 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 990 991 error = xfs_bmap_finish(&tp, &free_list, &committed); 992 if (error) 993 goto out_bmap_cancel; 994 995 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 996 if (error) 997 goto out_release_inode; 998 999 xfs_qm_dqrele(udqp); 1000 xfs_qm_dqrele(gdqp); 1001 1002 *ipp = ip; 1003 return 0; 1004 1005 out_bmap_cancel: 1006 xfs_bmap_cancel(&free_list); 1007 out_trans_abort: 1008 cancel_flags |= XFS_TRANS_ABORT; 1009 out_trans_cancel: 1010 xfs_trans_cancel(tp, cancel_flags); 1011 out_release_inode: 1012 /* 1013 * Wait until after the current transaction is aborted to 1014 * release the inode. This prevents recursive transactions 1015 * and deadlocks from xfs_inactive. 1016 */ 1017 if (ip) 1018 IRELE(ip); 1019 1020 xfs_qm_dqrele(udqp); 1021 xfs_qm_dqrele(gdqp); 1022 1023 if (unlock_dp_on_error) 1024 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1025 return error; 1026 } 1027 1028 #ifdef DEBUG 1029 int xfs_locked_n; 1030 int xfs_small_retries; 1031 int xfs_middle_retries; 1032 int xfs_lots_retries; 1033 int xfs_lock_delays; 1034 #endif 1035 1036 /* 1037 * Bump the subclass so xfs_lock_inodes() acquires each lock with 1038 * a different value 1039 */ 1040 static inline int xfs_lock_inumorder(int lock_mode,int subclass)1041 xfs_lock_inumorder(int lock_mode, int subclass) 1042 { 1043 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1044 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 1045 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 1046 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 1047 1048 return lock_mode; 1049 } 1050 1051 /* 1052 * The following routine will lock n inodes in exclusive mode. 1053 * We assume the caller calls us with the inodes in i_ino order. 1054 * 1055 * We need to detect deadlock where an inode that we lock 1056 * is in the AIL and we start waiting for another inode that is locked 1057 * by a thread in a long running transaction (such as truncate). This can 1058 * result in deadlock since the long running trans might need to wait 1059 * for the inode we just locked in order to push the tail and free space 1060 * in the log. 1061 */ 1062 void xfs_lock_inodes(xfs_inode_t ** ips,int inodes,uint lock_mode)1063 xfs_lock_inodes( 1064 xfs_inode_t **ips, 1065 int inodes, 1066 uint lock_mode) 1067 { 1068 int attempts = 0, i, j, try_lock; 1069 xfs_log_item_t *lp; 1070 1071 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 1072 1073 try_lock = 0; 1074 i = 0; 1075 1076 again: 1077 for (; i < inodes; i++) { 1078 ASSERT(ips[i]); 1079 1080 if (i && (ips[i] == ips[i-1])) /* Already locked */ 1081 continue; 1082 1083 /* 1084 * If try_lock is not set yet, make sure all locked inodes 1085 * are not in the AIL. 1086 * If any are, set try_lock to be used later. 1087 */ 1088 1089 if (!try_lock) { 1090 for (j = (i - 1); j >= 0 && !try_lock; j--) { 1091 lp = (xfs_log_item_t *)ips[j]->i_itemp; 1092 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1093 try_lock++; 1094 } 1095 } 1096 } 1097 1098 /* 1099 * If any of the previous locks we have locked is in the AIL, 1100 * we must TRY to get the second and subsequent locks. If 1101 * we can't get any, we must release all we have 1102 * and try again. 1103 */ 1104 1105 if (try_lock) { 1106 /* try_lock must be 0 if i is 0. */ 1107 /* 1108 * try_lock means we have an inode locked 1109 * that is in the AIL. 1110 */ 1111 ASSERT(i != 0); 1112 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 1113 attempts++; 1114 1115 /* 1116 * Unlock all previous guys and try again. 1117 * xfs_iunlock will try to push the tail 1118 * if the inode is in the AIL. 1119 */ 1120 1121 for(j = i - 1; j >= 0; j--) { 1122 1123 /* 1124 * Check to see if we've already 1125 * unlocked this one. 1126 * Not the first one going back, 1127 * and the inode ptr is the same. 1128 */ 1129 if ((j != (i - 1)) && ips[j] == 1130 ips[j+1]) 1131 continue; 1132 1133 xfs_iunlock(ips[j], lock_mode); 1134 } 1135 1136 if ((attempts % 5) == 0) { 1137 delay(1); /* Don't just spin the CPU */ 1138 #ifdef DEBUG 1139 xfs_lock_delays++; 1140 #endif 1141 } 1142 i = 0; 1143 try_lock = 0; 1144 goto again; 1145 } 1146 } else { 1147 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); 1148 } 1149 } 1150 1151 #ifdef DEBUG 1152 if (attempts) { 1153 if (attempts < 5) xfs_small_retries++; 1154 else if (attempts < 100) xfs_middle_retries++; 1155 else xfs_lots_retries++; 1156 } else { 1157 xfs_locked_n++; 1158 } 1159 #endif 1160 } 1161 1162 /* 1163 * xfs_lock_two_inodes() can only be used to lock one type of lock 1164 * at a time - the iolock or the ilock, but not both at once. If 1165 * we lock both at once, lockdep will report false positives saying 1166 * we have violated locking orders. 1167 */ 1168 void xfs_lock_two_inodes(xfs_inode_t * ip0,xfs_inode_t * ip1,uint lock_mode)1169 xfs_lock_two_inodes( 1170 xfs_inode_t *ip0, 1171 xfs_inode_t *ip1, 1172 uint lock_mode) 1173 { 1174 xfs_inode_t *temp; 1175 int attempts = 0; 1176 xfs_log_item_t *lp; 1177 1178 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 1179 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 1180 ASSERT(ip0->i_ino != ip1->i_ino); 1181 1182 if (ip0->i_ino > ip1->i_ino) { 1183 temp = ip0; 1184 ip0 = ip1; 1185 ip1 = temp; 1186 } 1187 1188 again: 1189 xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); 1190 1191 /* 1192 * If the first lock we have locked is in the AIL, we must TRY to get 1193 * the second lock. If we can't get it, we must release the first one 1194 * and try again. 1195 */ 1196 lp = (xfs_log_item_t *)ip0->i_itemp; 1197 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 1198 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { 1199 xfs_iunlock(ip0, lock_mode); 1200 if ((++attempts % 5) == 0) 1201 delay(1); /* Don't just spin the CPU */ 1202 goto again; 1203 } 1204 } else { 1205 xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); 1206 } 1207 } 1208 1209 int xfs_remove(xfs_inode_t * dp,struct xfs_name * name,xfs_inode_t * ip)1210 xfs_remove( 1211 xfs_inode_t *dp, 1212 struct xfs_name *name, 1213 xfs_inode_t *ip) 1214 { 1215 xfs_mount_t *mp = dp->i_mount; 1216 xfs_trans_t *tp = NULL; 1217 int is_dir = S_ISDIR(ip->i_d.di_mode); 1218 int error = 0; 1219 xfs_bmap_free_t free_list; 1220 xfs_fsblock_t first_block; 1221 int cancel_flags; 1222 int committed; 1223 int link_zero; 1224 uint resblks; 1225 uint log_count; 1226 1227 trace_xfs_remove(dp, name); 1228 1229 if (XFS_FORCED_SHUTDOWN(mp)) 1230 return XFS_ERROR(EIO); 1231 1232 error = xfs_qm_dqattach(dp, 0); 1233 if (error) 1234 goto std_return; 1235 1236 error = xfs_qm_dqattach(ip, 0); 1237 if (error) 1238 goto std_return; 1239 1240 if (is_dir) { 1241 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 1242 log_count = XFS_DEFAULT_LOG_COUNT; 1243 } else { 1244 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1245 log_count = XFS_REMOVE_LOG_COUNT; 1246 } 1247 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1248 1249 /* 1250 * We try to get the real space reservation first, 1251 * allowing for directory btree deletion(s) implying 1252 * possible bmap insert(s). If we can't get the space 1253 * reservation then we use 0 instead, and avoid the bmap 1254 * btree insert(s) in the directory code by, if the bmap 1255 * insert tries to happen, instead trimming the LAST 1256 * block from the directory. 1257 */ 1258 resblks = XFS_REMOVE_SPACE_RES(mp); 1259 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 1260 XFS_TRANS_PERM_LOG_RES, log_count); 1261 if (error == ENOSPC) { 1262 resblks = 0; 1263 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 1264 XFS_TRANS_PERM_LOG_RES, log_count); 1265 } 1266 if (error) { 1267 ASSERT(error != ENOSPC); 1268 cancel_flags = 0; 1269 goto out_trans_cancel; 1270 } 1271 1272 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 1273 1274 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1275 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1276 1277 /* 1278 * If we're removing a directory perform some additional validation. 1279 */ 1280 if (is_dir) { 1281 ASSERT(ip->i_d.di_nlink >= 2); 1282 if (ip->i_d.di_nlink != 2) { 1283 error = XFS_ERROR(ENOTEMPTY); 1284 goto out_trans_cancel; 1285 } 1286 if (!xfs_dir_isempty(ip)) { 1287 error = XFS_ERROR(ENOTEMPTY); 1288 goto out_trans_cancel; 1289 } 1290 } 1291 1292 xfs_bmap_init(&free_list, &first_block); 1293 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 1294 &first_block, &free_list, resblks); 1295 if (error) { 1296 ASSERT(error != ENOENT); 1297 goto out_bmap_cancel; 1298 } 1299 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1300 1301 if (is_dir) { 1302 /* 1303 * Drop the link from ip's "..". 1304 */ 1305 error = xfs_droplink(tp, dp); 1306 if (error) 1307 goto out_bmap_cancel; 1308 1309 /* 1310 * Drop the "." link from ip to self. 1311 */ 1312 error = xfs_droplink(tp, ip); 1313 if (error) 1314 goto out_bmap_cancel; 1315 } else { 1316 /* 1317 * When removing a non-directory we need to log the parent 1318 * inode here. For a directory this is done implicitly 1319 * by the xfs_droplink call for the ".." entry. 1320 */ 1321 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1322 } 1323 1324 /* 1325 * Drop the link from dp to ip. 1326 */ 1327 error = xfs_droplink(tp, ip); 1328 if (error) 1329 goto out_bmap_cancel; 1330 1331 /* 1332 * Determine if this is the last link while 1333 * we are in the transaction. 1334 */ 1335 link_zero = (ip->i_d.di_nlink == 0); 1336 1337 /* 1338 * If this is a synchronous mount, make sure that the 1339 * remove transaction goes to disk before returning to 1340 * the user. 1341 */ 1342 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1343 xfs_trans_set_sync(tp); 1344 1345 error = xfs_bmap_finish(&tp, &free_list, &committed); 1346 if (error) 1347 goto out_bmap_cancel; 1348 1349 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1350 if (error) 1351 goto std_return; 1352 1353 /* 1354 * If we are using filestreams, kill the stream association. 1355 * If the file is still open it may get a new one but that 1356 * will get killed on last close in xfs_close() so we don't 1357 * have to worry about that. 1358 */ 1359 if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) 1360 xfs_filestream_deassociate(ip); 1361 1362 return 0; 1363 1364 out_bmap_cancel: 1365 xfs_bmap_cancel(&free_list); 1366 cancel_flags |= XFS_TRANS_ABORT; 1367 out_trans_cancel: 1368 xfs_trans_cancel(tp, cancel_flags); 1369 std_return: 1370 return error; 1371 } 1372 1373 int xfs_link(xfs_inode_t * tdp,xfs_inode_t * sip,struct xfs_name * target_name)1374 xfs_link( 1375 xfs_inode_t *tdp, 1376 xfs_inode_t *sip, 1377 struct xfs_name *target_name) 1378 { 1379 xfs_mount_t *mp = tdp->i_mount; 1380 xfs_trans_t *tp; 1381 int error; 1382 xfs_bmap_free_t free_list; 1383 xfs_fsblock_t first_block; 1384 int cancel_flags; 1385 int committed; 1386 int resblks; 1387 1388 trace_xfs_link(tdp, target_name); 1389 1390 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1391 1392 if (XFS_FORCED_SHUTDOWN(mp)) 1393 return XFS_ERROR(EIO); 1394 1395 error = xfs_qm_dqattach(sip, 0); 1396 if (error) 1397 goto std_return; 1398 1399 error = xfs_qm_dqattach(tdp, 0); 1400 if (error) 1401 goto std_return; 1402 1403 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1404 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1405 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1406 error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, 1407 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1408 if (error == ENOSPC) { 1409 resblks = 0; 1410 error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0, 1411 XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); 1412 } 1413 if (error) { 1414 cancel_flags = 0; 1415 goto error_return; 1416 } 1417 1418 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1419 1420 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1421 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1422 1423 /* 1424 * If we are using project inheritance, we only allow hard link 1425 * creation in our tree when the project IDs are the same; else 1426 * the tree quota mechanism could be circumvented. 1427 */ 1428 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1429 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { 1430 error = XFS_ERROR(EXDEV); 1431 goto error_return; 1432 } 1433 1434 error = xfs_dir_canenter(tp, tdp, target_name, resblks); 1435 if (error) 1436 goto error_return; 1437 1438 xfs_bmap_init(&free_list, &first_block); 1439 1440 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1441 &first_block, &free_list, resblks); 1442 if (error) 1443 goto abort_return; 1444 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1445 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1446 1447 error = xfs_bumplink(tp, sip); 1448 if (error) 1449 goto abort_return; 1450 1451 /* 1452 * If this is a synchronous mount, make sure that the 1453 * link transaction goes to disk before returning to 1454 * the user. 1455 */ 1456 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1457 xfs_trans_set_sync(tp); 1458 } 1459 1460 error = xfs_bmap_finish (&tp, &free_list, &committed); 1461 if (error) { 1462 xfs_bmap_cancel(&free_list); 1463 goto abort_return; 1464 } 1465 1466 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1467 1468 abort_return: 1469 cancel_flags |= XFS_TRANS_ABORT; 1470 error_return: 1471 xfs_trans_cancel(tp, cancel_flags); 1472 std_return: 1473 return error; 1474 } 1475 1476 int xfs_symlink(xfs_inode_t * dp,struct xfs_name * link_name,const char * target_path,umode_t mode,xfs_inode_t ** ipp)1477 xfs_symlink( 1478 xfs_inode_t *dp, 1479 struct xfs_name *link_name, 1480 const char *target_path, 1481 umode_t mode, 1482 xfs_inode_t **ipp) 1483 { 1484 xfs_mount_t *mp = dp->i_mount; 1485 xfs_trans_t *tp; 1486 xfs_inode_t *ip; 1487 int error; 1488 int pathlen; 1489 xfs_bmap_free_t free_list; 1490 xfs_fsblock_t first_block; 1491 boolean_t unlock_dp_on_error = B_FALSE; 1492 uint cancel_flags; 1493 int committed; 1494 xfs_fileoff_t first_fsb; 1495 xfs_filblks_t fs_blocks; 1496 int nmaps; 1497 xfs_bmbt_irec_t mval[SYMLINK_MAPS]; 1498 xfs_daddr_t d; 1499 const char *cur_chunk; 1500 int byte_cnt; 1501 int n; 1502 xfs_buf_t *bp; 1503 prid_t prid; 1504 struct xfs_dquot *udqp, *gdqp; 1505 uint resblks; 1506 1507 *ipp = NULL; 1508 error = 0; 1509 ip = NULL; 1510 tp = NULL; 1511 1512 trace_xfs_symlink(dp, link_name); 1513 1514 if (XFS_FORCED_SHUTDOWN(mp)) 1515 return XFS_ERROR(EIO); 1516 1517 /* 1518 * Check component lengths of the target path name. 1519 */ 1520 pathlen = strlen(target_path); 1521 if (pathlen >= MAXPATHLEN) /* total string too long */ 1522 return XFS_ERROR(ENAMETOOLONG); 1523 1524 udqp = gdqp = NULL; 1525 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1526 prid = xfs_get_projid(dp); 1527 else 1528 prid = XFS_PROJID_DEFAULT; 1529 1530 /* 1531 * Make sure that we have allocated dquot(s) on disk. 1532 */ 1533 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1534 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1535 if (error) 1536 goto std_return; 1537 1538 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 1539 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1540 /* 1541 * The symlink will fit into the inode data fork? 1542 * There can't be any attributes so we get the whole variable part. 1543 */ 1544 if (pathlen <= XFS_LITINO(mp)) 1545 fs_blocks = 0; 1546 else 1547 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 1548 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 1549 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 1550 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1551 if (error == ENOSPC && fs_blocks == 0) { 1552 resblks = 0; 1553 error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0, 1554 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 1555 } 1556 if (error) { 1557 cancel_flags = 0; 1558 goto error_return; 1559 } 1560 1561 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1562 unlock_dp_on_error = B_TRUE; 1563 1564 /* 1565 * Check whether the directory allows new symlinks or not. 1566 */ 1567 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 1568 error = XFS_ERROR(EPERM); 1569 goto error_return; 1570 } 1571 1572 /* 1573 * Reserve disk quota : blocks and inode. 1574 */ 1575 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); 1576 if (error) 1577 goto error_return; 1578 1579 /* 1580 * Check for ability to enter directory entry, if no space reserved. 1581 */ 1582 error = xfs_dir_canenter(tp, dp, link_name, resblks); 1583 if (error) 1584 goto error_return; 1585 /* 1586 * Initialize the bmap freelist prior to calling either 1587 * bmapi or the directory create code. 1588 */ 1589 xfs_bmap_init(&free_list, &first_block); 1590 1591 /* 1592 * Allocate an inode for the symlink. 1593 */ 1594 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 1595 prid, resblks > 0, &ip, NULL); 1596 if (error) { 1597 if (error == ENOSPC) 1598 goto error_return; 1599 goto error1; 1600 } 1601 1602 /* 1603 * An error after we've joined dp to the transaction will result in the 1604 * transaction cancel unlocking dp so don't do it explicitly in the 1605 * error path. 1606 */ 1607 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1608 unlock_dp_on_error = B_FALSE; 1609 1610 /* 1611 * Also attach the dquot(s) to it, if applicable. 1612 */ 1613 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1614 1615 if (resblks) 1616 resblks -= XFS_IALLOC_SPACE_RES(mp); 1617 /* 1618 * If the symlink will fit into the inode, write it inline. 1619 */ 1620 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 1621 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 1622 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); 1623 ip->i_d.di_size = pathlen; 1624 1625 /* 1626 * The inode was initially created in extent format. 1627 */ 1628 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); 1629 ip->i_df.if_flags |= XFS_IFINLINE; 1630 1631 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 1632 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 1633 1634 } else { 1635 first_fsb = 0; 1636 nmaps = SYMLINK_MAPS; 1637 1638 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, 1639 XFS_BMAPI_METADATA, &first_block, resblks, 1640 mval, &nmaps, &free_list); 1641 if (error) 1642 goto error2; 1643 1644 if (resblks) 1645 resblks -= fs_blocks; 1646 ip->i_d.di_size = pathlen; 1647 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1648 1649 cur_chunk = target_path; 1650 for (n = 0; n < nmaps; n++) { 1651 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 1652 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 1653 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 1654 BTOBB(byte_cnt), 0); 1655 if (!bp) { 1656 error = ENOMEM; 1657 goto error2; 1658 } 1659 if (pathlen < byte_cnt) { 1660 byte_cnt = pathlen; 1661 } 1662 pathlen -= byte_cnt; 1663 1664 memcpy(bp->b_addr, cur_chunk, byte_cnt); 1665 cur_chunk += byte_cnt; 1666 1667 xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); 1668 } 1669 } 1670 1671 /* 1672 * Create the directory entry for the symlink. 1673 */ 1674 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 1675 &first_block, &free_list, resblks); 1676 if (error) 1677 goto error2; 1678 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1679 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1680 1681 /* 1682 * If this is a synchronous mount, make sure that the 1683 * symlink transaction goes to disk before returning to 1684 * the user. 1685 */ 1686 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1687 xfs_trans_set_sync(tp); 1688 } 1689 1690 error = xfs_bmap_finish(&tp, &free_list, &committed); 1691 if (error) { 1692 goto error2; 1693 } 1694 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1695 xfs_qm_dqrele(udqp); 1696 xfs_qm_dqrele(gdqp); 1697 1698 *ipp = ip; 1699 return 0; 1700 1701 error2: 1702 IRELE(ip); 1703 error1: 1704 xfs_bmap_cancel(&free_list); 1705 cancel_flags |= XFS_TRANS_ABORT; 1706 error_return: 1707 xfs_trans_cancel(tp, cancel_flags); 1708 xfs_qm_dqrele(udqp); 1709 xfs_qm_dqrele(gdqp); 1710 1711 if (unlock_dp_on_error) 1712 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1713 std_return: 1714 return error; 1715 } 1716 1717 int xfs_set_dmattrs(xfs_inode_t * ip,u_int evmask,u_int16_t state)1718 xfs_set_dmattrs( 1719 xfs_inode_t *ip, 1720 u_int evmask, 1721 u_int16_t state) 1722 { 1723 xfs_mount_t *mp = ip->i_mount; 1724 xfs_trans_t *tp; 1725 int error; 1726 1727 if (!capable(CAP_SYS_ADMIN)) 1728 return XFS_ERROR(EPERM); 1729 1730 if (XFS_FORCED_SHUTDOWN(mp)) 1731 return XFS_ERROR(EIO); 1732 1733 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 1734 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); 1735 if (error) { 1736 xfs_trans_cancel(tp, 0); 1737 return error; 1738 } 1739 xfs_ilock(ip, XFS_ILOCK_EXCL); 1740 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1741 1742 ip->i_d.di_dmevmask = evmask; 1743 ip->i_d.di_dmstate = state; 1744 1745 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1746 error = xfs_trans_commit(tp, 0); 1747 1748 return error; 1749 } 1750 1751 /* 1752 * xfs_alloc_file_space() 1753 * This routine allocates disk space for the given file. 1754 * 1755 * If alloc_type == 0, this request is for an ALLOCSP type 1756 * request which will change the file size. In this case, no 1757 * DMAPI event will be generated by the call. A TRUNCATE event 1758 * will be generated later by xfs_setattr. 1759 * 1760 * If alloc_type != 0, this request is for a RESVSP type 1761 * request, and a DMAPI DM_EVENT_WRITE will be generated if the 1762 * lower block boundary byte address is less than the file's 1763 * length. 1764 * 1765 * RETURNS: 1766 * 0 on success 1767 * errno on error 1768 * 1769 */ 1770 STATIC int xfs_alloc_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int alloc_type,int attr_flags)1771 xfs_alloc_file_space( 1772 xfs_inode_t *ip, 1773 xfs_off_t offset, 1774 xfs_off_t len, 1775 int alloc_type, 1776 int attr_flags) 1777 { 1778 xfs_mount_t *mp = ip->i_mount; 1779 xfs_off_t count; 1780 xfs_filblks_t allocated_fsb; 1781 xfs_filblks_t allocatesize_fsb; 1782 xfs_extlen_t extsz, temp; 1783 xfs_fileoff_t startoffset_fsb; 1784 xfs_fsblock_t firstfsb; 1785 int nimaps; 1786 int quota_flag; 1787 int rt; 1788 xfs_trans_t *tp; 1789 xfs_bmbt_irec_t imaps[1], *imapp; 1790 xfs_bmap_free_t free_list; 1791 uint qblocks, resblks, resrtextents; 1792 int committed; 1793 int error; 1794 1795 trace_xfs_alloc_file_space(ip); 1796 1797 if (XFS_FORCED_SHUTDOWN(mp)) 1798 return XFS_ERROR(EIO); 1799 1800 error = xfs_qm_dqattach(ip, 0); 1801 if (error) 1802 return error; 1803 1804 if (len <= 0) 1805 return XFS_ERROR(EINVAL); 1806 1807 rt = XFS_IS_REALTIME_INODE(ip); 1808 extsz = xfs_get_extsz_hint(ip); 1809 1810 count = len; 1811 imapp = &imaps[0]; 1812 nimaps = 1; 1813 startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 1814 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 1815 1816 /* 1817 * Allocate file space until done or until there is an error 1818 */ 1819 while (allocatesize_fsb && !error) { 1820 xfs_fileoff_t s, e; 1821 1822 /* 1823 * Determine space reservations for data/realtime. 1824 */ 1825 if (unlikely(extsz)) { 1826 s = startoffset_fsb; 1827 do_div(s, extsz); 1828 s *= extsz; 1829 e = startoffset_fsb + allocatesize_fsb; 1830 if ((temp = do_mod(startoffset_fsb, extsz))) 1831 e += temp; 1832 if ((temp = do_mod(e, extsz))) 1833 e += extsz - temp; 1834 } else { 1835 s = 0; 1836 e = allocatesize_fsb; 1837 } 1838 1839 /* 1840 * The transaction reservation is limited to a 32-bit block 1841 * count, hence we need to limit the number of blocks we are 1842 * trying to reserve to avoid an overflow. We can't allocate 1843 * more than @nimaps extents, and an extent is limited on disk 1844 * to MAXEXTLEN (21 bits), so use that to enforce the limit. 1845 */ 1846 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 1847 if (unlikely(rt)) { 1848 resrtextents = qblocks = resblks; 1849 resrtextents /= mp->m_sb.sb_rextsize; 1850 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1851 quota_flag = XFS_QMOPT_RES_RTBLKS; 1852 } else { 1853 resrtextents = 0; 1854 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 1855 quota_flag = XFS_QMOPT_RES_REGBLKS; 1856 } 1857 1858 /* 1859 * Allocate and setup the transaction. 1860 */ 1861 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1862 error = xfs_trans_reserve(tp, resblks, 1863 XFS_WRITE_LOG_RES(mp), resrtextents, 1864 XFS_TRANS_PERM_LOG_RES, 1865 XFS_WRITE_LOG_COUNT); 1866 /* 1867 * Check for running out of space 1868 */ 1869 if (error) { 1870 /* 1871 * Free the transaction structure. 1872 */ 1873 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1874 xfs_trans_cancel(tp, 0); 1875 break; 1876 } 1877 xfs_ilock(ip, XFS_ILOCK_EXCL); 1878 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 1879 0, quota_flag); 1880 if (error) 1881 goto error1; 1882 1883 xfs_trans_ijoin(tp, ip, 0); 1884 1885 xfs_bmap_init(&free_list, &firstfsb); 1886 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1887 allocatesize_fsb, alloc_type, &firstfsb, 1888 0, imapp, &nimaps, &free_list); 1889 if (error) { 1890 goto error0; 1891 } 1892 1893 /* 1894 * Complete the transaction 1895 */ 1896 error = xfs_bmap_finish(&tp, &free_list, &committed); 1897 if (error) { 1898 goto error0; 1899 } 1900 1901 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1902 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1903 if (error) { 1904 break; 1905 } 1906 1907 allocated_fsb = imapp->br_blockcount; 1908 1909 if (nimaps == 0) { 1910 error = XFS_ERROR(ENOSPC); 1911 break; 1912 } 1913 1914 startoffset_fsb += allocated_fsb; 1915 allocatesize_fsb -= allocated_fsb; 1916 } 1917 1918 return error; 1919 1920 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1921 xfs_bmap_cancel(&free_list); 1922 xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 1923 1924 error1: /* Just cancel transaction */ 1925 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1926 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1927 return error; 1928 } 1929 1930 /* 1931 * Zero file bytes between startoff and endoff inclusive. 1932 * The iolock is held exclusive and no blocks are buffered. 1933 * 1934 * This function is used by xfs_free_file_space() to zero 1935 * partial blocks when the range to free is not block aligned. 1936 * When unreserving space with boundaries that are not block 1937 * aligned we round up the start and round down the end 1938 * boundaries and then use this function to zero the parts of 1939 * the blocks that got dropped during the rounding. 1940 */ 1941 STATIC int xfs_zero_remaining_bytes(xfs_inode_t * ip,xfs_off_t startoff,xfs_off_t endoff)1942 xfs_zero_remaining_bytes( 1943 xfs_inode_t *ip, 1944 xfs_off_t startoff, 1945 xfs_off_t endoff) 1946 { 1947 xfs_bmbt_irec_t imap; 1948 xfs_fileoff_t offset_fsb; 1949 xfs_off_t lastoffset; 1950 xfs_off_t offset; 1951 xfs_buf_t *bp; 1952 xfs_mount_t *mp = ip->i_mount; 1953 int nimap; 1954 int error = 0; 1955 1956 /* 1957 * Avoid doing I/O beyond eof - it's not necessary 1958 * since nothing can read beyond eof. The space will 1959 * be zeroed when the file is extended anyway. 1960 */ 1961 if (startoff >= XFS_ISIZE(ip)) 1962 return 0; 1963 1964 if (endoff > XFS_ISIZE(ip)) 1965 endoff = XFS_ISIZE(ip); 1966 1967 bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? 1968 mp->m_rtdev_targp : mp->m_ddev_targp, 1969 mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); 1970 if (!bp) 1971 return XFS_ERROR(ENOMEM); 1972 1973 xfs_buf_unlock(bp); 1974 1975 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1976 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1977 nimap = 1; 1978 error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); 1979 if (error || nimap < 1) 1980 break; 1981 ASSERT(imap.br_blockcount >= 1); 1982 ASSERT(imap.br_startoff == offset_fsb); 1983 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1984 if (lastoffset > endoff) 1985 lastoffset = endoff; 1986 if (imap.br_startblock == HOLESTARTBLOCK) 1987 continue; 1988 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1989 if (imap.br_state == XFS_EXT_UNWRITTEN) 1990 continue; 1991 XFS_BUF_UNDONE(bp); 1992 XFS_BUF_UNWRITE(bp); 1993 XFS_BUF_READ(bp); 1994 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 1995 xfsbdstrat(mp, bp); 1996 error = xfs_buf_iowait(bp); 1997 if (error) { 1998 xfs_buf_ioerror_alert(bp, 1999 "xfs_zero_remaining_bytes(read)"); 2000 break; 2001 } 2002 memset(bp->b_addr + 2003 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 2004 0, lastoffset - offset + 1); 2005 XFS_BUF_UNDONE(bp); 2006 XFS_BUF_UNREAD(bp); 2007 XFS_BUF_WRITE(bp); 2008 xfsbdstrat(mp, bp); 2009 error = xfs_buf_iowait(bp); 2010 if (error) { 2011 xfs_buf_ioerror_alert(bp, 2012 "xfs_zero_remaining_bytes(write)"); 2013 break; 2014 } 2015 } 2016 xfs_buf_free(bp); 2017 return error; 2018 } 2019 2020 /* 2021 * xfs_free_file_space() 2022 * This routine frees disk space for the given file. 2023 * 2024 * This routine is only called by xfs_change_file_space 2025 * for an UNRESVSP type call. 2026 * 2027 * RETURNS: 2028 * 0 on success 2029 * errno on error 2030 * 2031 */ 2032 STATIC int xfs_free_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int attr_flags)2033 xfs_free_file_space( 2034 xfs_inode_t *ip, 2035 xfs_off_t offset, 2036 xfs_off_t len, 2037 int attr_flags) 2038 { 2039 int committed; 2040 int done; 2041 xfs_fileoff_t endoffset_fsb; 2042 int error; 2043 xfs_fsblock_t firstfsb; 2044 xfs_bmap_free_t free_list; 2045 xfs_bmbt_irec_t imap; 2046 xfs_off_t ioffset; 2047 xfs_extlen_t mod=0; 2048 xfs_mount_t *mp; 2049 int nimap; 2050 uint resblks; 2051 uint rounding; 2052 int rt; 2053 xfs_fileoff_t startoffset_fsb; 2054 xfs_trans_t *tp; 2055 int need_iolock = 1; 2056 2057 mp = ip->i_mount; 2058 2059 trace_xfs_free_file_space(ip); 2060 2061 error = xfs_qm_dqattach(ip, 0); 2062 if (error) 2063 return error; 2064 2065 error = 0; 2066 if (len <= 0) /* if nothing being freed */ 2067 return error; 2068 rt = XFS_IS_REALTIME_INODE(ip); 2069 startoffset_fsb = XFS_B_TO_FSB(mp, offset); 2070 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 2071 2072 if (attr_flags & XFS_ATTR_NOLOCK) 2073 need_iolock = 0; 2074 if (need_iolock) { 2075 xfs_ilock(ip, XFS_IOLOCK_EXCL); 2076 /* wait for the completion of any pending DIOs */ 2077 inode_dio_wait(VFS_I(ip)); 2078 } 2079 2080 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 2081 ioffset = offset & ~(rounding - 1); 2082 2083 if (VN_CACHED(VFS_I(ip)) != 0) { 2084 error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); 2085 if (error) 2086 goto out_unlock_iolock; 2087 } 2088 2089 /* 2090 * Need to zero the stuff we're not freeing, on disk. 2091 * If it's a realtime file & can't use unwritten extents then we 2092 * actually need to zero the extent edges. Otherwise xfs_bunmapi 2093 * will take care of it for us. 2094 */ 2095 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 2096 nimap = 1; 2097 error = xfs_bmapi_read(ip, startoffset_fsb, 1, 2098 &imap, &nimap, 0); 2099 if (error) 2100 goto out_unlock_iolock; 2101 ASSERT(nimap == 0 || nimap == 1); 2102 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 2103 xfs_daddr_t block; 2104 2105 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2106 block = imap.br_startblock; 2107 mod = do_div(block, mp->m_sb.sb_rextsize); 2108 if (mod) 2109 startoffset_fsb += mp->m_sb.sb_rextsize - mod; 2110 } 2111 nimap = 1; 2112 error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, 2113 &imap, &nimap, 0); 2114 if (error) 2115 goto out_unlock_iolock; 2116 ASSERT(nimap == 0 || nimap == 1); 2117 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 2118 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 2119 mod++; 2120 if (mod && (mod != mp->m_sb.sb_rextsize)) 2121 endoffset_fsb -= mod; 2122 } 2123 } 2124 if ((done = (endoffset_fsb <= startoffset_fsb))) 2125 /* 2126 * One contiguous piece to clear 2127 */ 2128 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 2129 else { 2130 /* 2131 * Some full blocks, possibly two pieces to clear 2132 */ 2133 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 2134 error = xfs_zero_remaining_bytes(ip, offset, 2135 XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 2136 if (!error && 2137 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 2138 error = xfs_zero_remaining_bytes(ip, 2139 XFS_FSB_TO_B(mp, endoffset_fsb), 2140 offset + len - 1); 2141 } 2142 2143 /* 2144 * free file space until done or until there is an error 2145 */ 2146 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2147 while (!error && !done) { 2148 2149 /* 2150 * allocate and setup the transaction. Allow this 2151 * transaction to dip into the reserve blocks to ensure 2152 * the freeing of the space succeeds at ENOSPC. 2153 */ 2154 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 2155 tp->t_flags |= XFS_TRANS_RESERVE; 2156 error = xfs_trans_reserve(tp, 2157 resblks, 2158 XFS_WRITE_LOG_RES(mp), 2159 0, 2160 XFS_TRANS_PERM_LOG_RES, 2161 XFS_WRITE_LOG_COUNT); 2162 2163 /* 2164 * check for running out of space 2165 */ 2166 if (error) { 2167 /* 2168 * Free the transaction structure. 2169 */ 2170 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 2171 xfs_trans_cancel(tp, 0); 2172 break; 2173 } 2174 xfs_ilock(ip, XFS_ILOCK_EXCL); 2175 error = xfs_trans_reserve_quota(tp, mp, 2176 ip->i_udquot, ip->i_gdquot, 2177 resblks, 0, XFS_QMOPT_RES_REGBLKS); 2178 if (error) 2179 goto error1; 2180 2181 xfs_trans_ijoin(tp, ip, 0); 2182 2183 /* 2184 * issue the bunmapi() call to free the blocks 2185 */ 2186 xfs_bmap_init(&free_list, &firstfsb); 2187 error = xfs_bunmapi(tp, ip, startoffset_fsb, 2188 endoffset_fsb - startoffset_fsb, 2189 0, 2, &firstfsb, &free_list, &done); 2190 if (error) { 2191 goto error0; 2192 } 2193 2194 /* 2195 * complete the transaction 2196 */ 2197 error = xfs_bmap_finish(&tp, &free_list, &committed); 2198 if (error) { 2199 goto error0; 2200 } 2201 2202 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2203 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2204 } 2205 2206 out_unlock_iolock: 2207 if (need_iolock) 2208 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 2209 return error; 2210 2211 error0: 2212 xfs_bmap_cancel(&free_list); 2213 error1: 2214 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 2215 xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 2216 XFS_ILOCK_EXCL); 2217 return error; 2218 } 2219 2220 /* 2221 * xfs_change_file_space() 2222 * This routine allocates or frees disk space for the given file. 2223 * The user specified parameters are checked for alignment and size 2224 * limitations. 2225 * 2226 * RETURNS: 2227 * 0 on success 2228 * errno on error 2229 * 2230 */ 2231 int xfs_change_file_space(xfs_inode_t * ip,int cmd,xfs_flock64_t * bf,xfs_off_t offset,int attr_flags)2232 xfs_change_file_space( 2233 xfs_inode_t *ip, 2234 int cmd, 2235 xfs_flock64_t *bf, 2236 xfs_off_t offset, 2237 int attr_flags) 2238 { 2239 xfs_mount_t *mp = ip->i_mount; 2240 int clrprealloc; 2241 int error; 2242 xfs_fsize_t fsize; 2243 int setprealloc; 2244 xfs_off_t startoffset; 2245 xfs_off_t llen; 2246 xfs_trans_t *tp; 2247 struct iattr iattr; 2248 int prealloc_type; 2249 2250 if (!S_ISREG(ip->i_d.di_mode)) 2251 return XFS_ERROR(EINVAL); 2252 2253 switch (bf->l_whence) { 2254 case 0: /*SEEK_SET*/ 2255 break; 2256 case 1: /*SEEK_CUR*/ 2257 bf->l_start += offset; 2258 break; 2259 case 2: /*SEEK_END*/ 2260 bf->l_start += XFS_ISIZE(ip); 2261 break; 2262 default: 2263 return XFS_ERROR(EINVAL); 2264 } 2265 2266 llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; 2267 2268 if ( (bf->l_start < 0) 2269 || (bf->l_start > XFS_MAXIOFFSET(mp)) 2270 || (bf->l_start + llen < 0) 2271 || (bf->l_start + llen > XFS_MAXIOFFSET(mp))) 2272 return XFS_ERROR(EINVAL); 2273 2274 bf->l_whence = 0; 2275 2276 startoffset = bf->l_start; 2277 fsize = XFS_ISIZE(ip); 2278 2279 /* 2280 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 2281 * file space. 2282 * These calls do NOT zero the data space allocated to the file, 2283 * nor do they change the file size. 2284 * 2285 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file 2286 * space. 2287 * These calls cause the new file data to be zeroed and the file 2288 * size to be changed. 2289 */ 2290 setprealloc = clrprealloc = 0; 2291 prealloc_type = XFS_BMAPI_PREALLOC; 2292 2293 switch (cmd) { 2294 case XFS_IOC_ZERO_RANGE: 2295 prealloc_type |= XFS_BMAPI_CONVERT; 2296 xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); 2297 /* FALLTHRU */ 2298 case XFS_IOC_RESVSP: 2299 case XFS_IOC_RESVSP64: 2300 error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 2301 prealloc_type, attr_flags); 2302 if (error) 2303 return error; 2304 setprealloc = 1; 2305 break; 2306 2307 case XFS_IOC_UNRESVSP: 2308 case XFS_IOC_UNRESVSP64: 2309 if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 2310 attr_flags))) 2311 return error; 2312 break; 2313 2314 case XFS_IOC_ALLOCSP: 2315 case XFS_IOC_ALLOCSP64: 2316 case XFS_IOC_FREESP: 2317 case XFS_IOC_FREESP64: 2318 if (startoffset > fsize) { 2319 error = xfs_alloc_file_space(ip, fsize, 2320 startoffset - fsize, 0, attr_flags); 2321 if (error) 2322 break; 2323 } 2324 2325 iattr.ia_valid = ATTR_SIZE; 2326 iattr.ia_size = startoffset; 2327 2328 error = xfs_setattr_size(ip, &iattr, attr_flags); 2329 2330 if (error) 2331 return error; 2332 2333 clrprealloc = 1; 2334 break; 2335 2336 default: 2337 ASSERT(0); 2338 return XFS_ERROR(EINVAL); 2339 } 2340 2341 /* 2342 * update the inode timestamp, mode, and prealloc flag bits 2343 */ 2344 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 2345 2346 if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 2347 0, 0, 0))) { 2348 /* ASSERT(0); */ 2349 xfs_trans_cancel(tp, 0); 2350 return error; 2351 } 2352 2353 xfs_ilock(ip, XFS_ILOCK_EXCL); 2354 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2355 2356 if ((attr_flags & XFS_ATTR_DMI) == 0) { 2357 ip->i_d.di_mode &= ~S_ISUID; 2358 2359 /* 2360 * Note that we don't have to worry about mandatory 2361 * file locking being disabled here because we only 2362 * clear the S_ISGID bit if the Group execute bit is 2363 * on, but if it was on then mandatory locking wouldn't 2364 * have been enabled. 2365 */ 2366 if (ip->i_d.di_mode & S_IXGRP) 2367 ip->i_d.di_mode &= ~S_ISGID; 2368 2369 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2370 } 2371 if (setprealloc) 2372 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 2373 else if (clrprealloc) 2374 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 2375 2376 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2377 if (attr_flags & XFS_ATTR_SYNC) 2378 xfs_trans_set_sync(tp); 2379 return xfs_trans_commit(tp, 0); 2380 } 2381