Lines Matching +full:conf +full:- +full:pu
1 // SPDX-License-Identifier: GPL-2.0-or-later
10 - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar
11 - RAID-6 extensions by H. Peter Anvin <hpa@zytor.com>
12 - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>
13 - kerneld support by Boris Tobotras <boris@xtalk.msk.su>
14 - kmod support by: Cyrus Durgin
15 - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com>
16 - Devfs support by Richard Gooch <rgooch@atnf.csiro.au>
18 - lots of fixes and improvements to the RAID1/RAID5 and generic
23 - persistent bitmap code
24 Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
63 #include <linux/percpu-refcount.h>
68 #include "md-bitmap.h"
69 #include "md-cluster.h"
74 * mddev->thread when the mutex cannot be held.
103 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
108 * speed limit - in case reconstruction slows down your system despite
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
152 if (test_bit(CollisionCheck, &rdev->flags))
158 return -ENOMEM;
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
186 if (ret && !mddev->serial_info_pool)
194 * 1. it is multi-queue device flaged with writemostly.
195 * 2. the write-behind mode is enabled.
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
215 !test_bit(CollisionCheck, &rdev->flags))
228 if (mddev->serial_info_pool == NULL) {
233 mddev->serial_info_pool =
236 if (!mddev->serial_info_pool) {
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
259 if (mddev->serial_info_pool) {
267 if (!mddev->serialize_policy ||
273 test_bit(CollisionCheck, &temp->flags))
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
335 * a device node in /dev and to open it. This causes races with device-close.
346 if (!mddev || !bioset_initialized(&mddev->bio_set))
349 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
355 if (!mddev || !bioset_initialized(&mddev->sync_set))
358 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
406 _tmp = _tmp->next;}) \
412 * We hold a refcount over the call to ->make_request. By the time that
414 * and so is visible to ->quiesce(), so we don't need the refcount any more.
418 if (mddev->suspended)
422 if (mddev->suspend_lo >= mddev->suspend_hi)
424 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
426 if (bio_end_sector(bio) < mddev->suspend_lo)
438 prepare_to_wait(&mddev->sb_wait, &__wait,
446 finish_wait(&mddev->sb_wait, &__wait);
448 atomic_inc(&mddev->active_io);
451 if (!mddev->pers->make_request(mddev, bio)) {
452 atomic_dec(&mddev->active_io);
453 wake_up(&mddev->sb_wait);
457 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
458 wake_up(&mddev->sb_wait);
465 struct mddev *mddev = bio->bi_disk->private_data;
467 if (mddev == NULL || mddev->pers == NULL) {
472 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
479 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
481 bio->bi_status = BLK_STS_IOERR;
487 bio->bi_opf &= ~REQ_NOMERGE;
502 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
503 lockdep_assert_held(&mddev->reconfig_mutex);
504 if (mddev->suspended++)
507 wake_up(&mddev->sb_wait);
508 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
510 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
511 mddev->pers->quiesce(mddev, 1);
512 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
513 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
515 del_timer_sync(&mddev->safemode_timer);
517 mddev->noio_flag = memalloc_noio_save();
524 memalloc_noio_restore(mddev->noio_flag);
525 lockdep_assert_held(&mddev->reconfig_mutex);
526 if (--mddev->suspended)
528 wake_up(&mddev->sb_wait);
529 mddev->pers->quiesce(mddev, 0);
531 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
532 md_wakeup_thread(mddev->thread);
533 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
543 struct md_rdev *rdev = bio->bi_private;
544 struct mddev *mddev = rdev->mddev;
550 if (atomic_dec_and_test(&mddev->flush_pending)) {
551 /* The pre-request flush has finished */
552 queue_work(md_wq, &mddev->flush_work);
563 mddev->start_flush = ktime_get_boottime();
564 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
565 atomic_set(&mddev->flush_pending, 1);
568 if (rdev->raid_disk >= 0 &&
569 !test_bit(Faulty, &rdev->flags)) {
575 atomic_inc(&rdev->nr_pending);
576 atomic_inc(&rdev->nr_pending);
579 bi->bi_end_io = md_end_flush;
580 bi->bi_private = rdev;
581 bio_set_dev(bi, rdev->bdev);
582 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
583 atomic_inc(&mddev->flush_pending);
589 if (atomic_dec_and_test(&mddev->flush_pending))
590 queue_work(md_wq, &mddev->flush_work);
596 struct bio *bio = mddev->flush_bio;
604 spin_lock_irq(&mddev->lock);
605 mddev->last_flush = mddev->start_flush;
606 mddev->flush_bio = NULL;
607 spin_unlock_irq(&mddev->lock);
608 wake_up(&mddev->sb_wait);
610 if (bio->bi_iter.bi_size == 0) {
611 /* an empty barrier - all done */
614 bio->bi_opf &= ~REQ_PREFLUSH;
628 spin_lock_irq(&mddev->lock);
629 wait_event_lock_irq(mddev->sb_wait,
630 !mddev->flush_bio ||
631 ktime_after(mddev->last_flush, start),
632 mddev->lock);
633 if (!ktime_after(mddev->last_flush, start)) {
634 WARN_ON(mddev->flush_bio);
635 mddev->flush_bio = bio;
638 spin_unlock_irq(&mddev->lock);
641 INIT_WORK(&mddev->flush_work, submit_flushes);
642 queue_work(md_wq, &mddev->flush_work);
645 if (bio->bi_iter.bi_size == 0)
646 /* an empty barrier - all done */
649 bio->bi_opf &= ~REQ_PREFLUSH;
659 atomic_inc(&mddev->active);
667 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
669 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
670 mddev->ctime == 0 && !mddev->hold_active) {
673 list_del_init(&mddev->all_mddevs);
680 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
681 queue_work(md_misc_wq, &mddev->del_work);
690 kobject_init(&mddev->kobj, &md_ktype);
691 mutex_init(&mddev->open_mutex);
692 mutex_init(&mddev->reconfig_mutex);
693 mutex_init(&mddev->bitmap_info.mutex);
694 INIT_LIST_HEAD(&mddev->disks);
695 INIT_LIST_HEAD(&mddev->all_mddevs);
696 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
697 atomic_set(&mddev->active, 1);
698 atomic_set(&mddev->openers, 0);
699 atomic_set(&mddev->active_io, 0);
700 spin_lock_init(&mddev->lock);
701 atomic_set(&mddev->flush_pending, 0);
702 init_waitqueue_head(&mddev->sb_wait);
703 init_waitqueue_head(&mddev->recovery_wait);
704 mddev->reshape_position = MaxSector;
705 mddev->reshape_backwards = 0;
706 mddev->last_sync_action = "none";
707 mddev->resync_min = 0;
708 mddev->resync_max = MaxSector;
709 mddev->level = LEVEL_NONE;
718 if (mddev->unit == unit)
729 unit &= ~((1 << MdpMinorShift) - 1);
745 unit &= ~((1<<MdpMinorShift)-1);
760 list_add(&new->all_mddevs, &all_mddevs);
762 new->hold_active = UNTIL_IOCTL;
785 new->unit = dev;
786 new->md_minor = MINOR(dev);
787 new->hold_active = UNTIL_STOP;
788 list_add(&new->all_mddevs, &all_mddevs);
798 new->unit = unit;
800 new->md_minor = MINOR(unit);
802 new->md_minor = MINOR(unit) >> MdpMinorShift;
813 if (mddev->to_remove) {
819 * and anything else which might set ->to_remove or my
821 * -EBUSY if sysfs_active is still set.
826 struct attribute_group *to_remove = mddev->to_remove;
827 mddev->to_remove = NULL;
828 mddev->sysfs_active = 1;
829 mutex_unlock(&mddev->reconfig_mutex);
831 if (mddev->kobj.sd) {
833 sysfs_remove_group(&mddev->kobj, to_remove);
834 if (mddev->pers == NULL ||
835 mddev->pers->sync_request == NULL) {
836 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
837 if (mddev->sysfs_action)
838 sysfs_put(mddev->sysfs_action);
839 if (mddev->sysfs_completed)
840 sysfs_put(mddev->sysfs_completed);
841 if (mddev->sysfs_degraded)
842 sysfs_put(mddev->sysfs_degraded);
843 mddev->sysfs_action = NULL;
844 mddev->sysfs_completed = NULL;
845 mddev->sysfs_degraded = NULL;
848 mddev->sysfs_active = 0;
850 mutex_unlock(&mddev->reconfig_mutex);
856 md_wakeup_thread(mddev->thread);
857 wake_up(&mddev->sb_wait);
867 if (rdev->desc_nr == nr)
879 if (rdev->bdev->bd_dev == dev)
890 if (rdev->bdev->bd_dev == dev)
901 if (level != LEVEL_NONE && pers->level == level)
903 if (strcmp(pers->name, clevel)==0)
912 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
918 rdev->sb_page = alloc_page(GFP_KERNEL);
919 if (!rdev->sb_page)
920 return -ENOMEM;
926 if (rdev->sb_page) {
927 put_page(rdev->sb_page);
928 rdev->sb_loaded = 0;
929 rdev->sb_page = NULL;
930 rdev->sb_start = 0;
931 rdev->sectors = 0;
933 if (rdev->bb_page) {
934 put_page(rdev->bb_page);
935 rdev->bb_page = NULL;
937 badblocks_exit(&rdev->badblocks);
943 struct md_rdev *rdev = bio->bi_private;
944 struct mddev *mddev = rdev->mddev;
946 if (bio->bi_status) {
948 blk_status_to_errno(bio->bi_status));
950 if (!test_bit(Faulty, &rdev->flags)
951 && (bio->bi_opf & MD_FAILFAST)) {
952 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
953 set_bit(LastDev, &rdev->flags);
956 clear_bit(LastDev, &rdev->flags);
962 if (atomic_dec_and_test(&mddev->pending_writes))
963 wake_up(&mddev->sb_wait);
970 * Increment mddev->pending_writes before returning
981 if (test_bit(Faulty, &rdev->flags))
986 atomic_inc(&rdev->nr_pending);
988 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
989 bio->bi_iter.bi_sector = sector;
991 bio->bi_private = rdev;
992 bio->bi_end_io = super_written;
994 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
995 test_bit(FailFast, &rdev->flags) &&
996 !test_bit(LastDev, &rdev->flags))
998 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
1000 atomic_inc(&mddev->pending_writes);
1007 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1008 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1009 return -EAGAIN;
1016 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1019 if (metadata_op && rdev->meta_bdev)
1020 bio_set_dev(bio, rdev->meta_bdev);
1022 bio_set_dev(bio, rdev->bdev);
1025 bio->bi_iter.bi_sector = sector + rdev->sb_start;
1026 else if (rdev->mddev->reshape_position != MaxSector &&
1027 (rdev->mddev->reshape_backwards ==
1028 (sector >= rdev->mddev->reshape_position)))
1029 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
1031 bio->bi_iter.bi_sector = sector + rdev->data_offset;
1036 ret = !bio->bi_status;
1046 if (rdev->sb_loaded)
1049 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1051 rdev->sb_loaded = 1;
1056 bdevname(rdev->bdev,b));
1057 return -EINVAL;
1062 return sb1->set_uuid0 == sb2->set_uuid0 &&
1063 sb1->set_uuid1 == sb2->set_uuid1 &&
1064 sb1->set_uuid2 == sb2->set_uuid2 &&
1065 sb1->set_uuid3 == sb2->set_uuid3;
1087 tmp1->nr_disks = 0;
1088 tmp2->nr_disks = 0;
1110 disk_csum = sb->sb_csum;
1111 sb->sb_csum = 0;
1126 sb->sb_csum = md_csum_fold(disk_csum);
1128 sb->sb_csum = disk_csum;
1138 * We rely on user-space to write the initial superblock, and support
1145 * 0 - dev has a superblock that is compatible with refdev
1146 * 1 - dev has a superblock that is compatible and newer than refdev
1148 * -EINVAL superblock incompatible or invalid
1149 * -othererror e.g. -EIO
1153 * The first time, mddev->raid_disks will be 0, and data from
1155 * is new enough. Return 0 or -EINVAL
1184 * support bitmaps. It prints an error message and returns non-zero if mddev
1190 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1193 mdname(mddev), mddev->pers->name);
1214 rdev->sb_start = calc_dev_sboffset(rdev);
1220 ret = -EINVAL;
1222 bdevname(rdev->bdev, b);
1223 sb = page_address(rdev->sb_page);
1225 if (sb->md_magic != MD_SB_MAGIC) {
1230 if (sb->major_version != 0 ||
1231 sb->minor_version < 90 ||
1232 sb->minor_version > 91) {
1234 sb->major_version, sb->minor_version, b);
1238 if (sb->raid_disks <= 0)
1241 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1246 rdev->preferred_minor = sb->md_minor;
1247 rdev->data_offset = 0;
1248 rdev->new_data_offset = 0;
1249 rdev->sb_size = MD_SB_BYTES;
1250 rdev->badblocks.shift = -1;
1252 if (sb->level == LEVEL_MULTIPATH)
1253 rdev->desc_nr = -1;
1255 rdev->desc_nr = sb->this_disk.number;
1258 if (sb->level == LEVEL_MULTIPATH ||
1259 (rdev->desc_nr >= 0 &&
1260 rdev->desc_nr < MD_SB_DISKS &&
1261 sb->disks[rdev->desc_nr].state &
1272 mdp_super_t *refsb = page_address(refdev->sb_page);
1275 b, bdevname(refdev->bdev,b2));
1280 b, bdevname(refdev->bdev, b2));
1291 rdev->sectors = rdev->sb_start;
1296 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1297 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1299 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1301 ret = -EINVAL;
1314 mdp_super_t *sb = page_address(rdev->sb_page);
1317 rdev->raid_disk = -1;
1318 clear_bit(Faulty, &rdev->flags);
1319 clear_bit(In_sync, &rdev->flags);
1320 clear_bit(Bitmap_sync, &rdev->flags);
1321 clear_bit(WriteMostly, &rdev->flags);
1323 if (mddev->raid_disks == 0) {
1324 mddev->major_version = 0;
1325 mddev->minor_version = sb->minor_version;
1326 mddev->patch_version = sb->patch_version;
1327 mddev->external = 0;
1328 mddev->chunk_sectors = sb->chunk_size >> 9;
1329 mddev->ctime = sb->ctime;
1330 mddev->utime = sb->utime;
1331 mddev->level = sb->level;
1332 mddev->clevel[0] = 0;
1333 mddev->layout = sb->layout;
1334 mddev->raid_disks = sb->raid_disks;
1335 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1336 mddev->events = ev1;
1337 mddev->bitmap_info.offset = 0;
1338 mddev->bitmap_info.space = 0;
1340 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1341 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1342 mddev->reshape_backwards = 0;
1344 if (mddev->minor_version >= 91) {
1345 mddev->reshape_position = sb->reshape_position;
1346 mddev->delta_disks = sb->delta_disks;
1347 mddev->new_level = sb->new_level;
1348 mddev->new_layout = sb->new_layout;
1349 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1350 if (mddev->delta_disks < 0)
1351 mddev->reshape_backwards = 1;
1353 mddev->reshape_position = MaxSector;
1354 mddev->delta_disks = 0;
1355 mddev->new_level = mddev->level;
1356 mddev->new_layout = mddev->layout;
1357 mddev->new_chunk_sectors = mddev->chunk_sectors;
1359 if (mddev->level == 0)
1360 mddev->layout = -1;
1362 if (sb->state & (1<<MD_SB_CLEAN))
1363 mddev->recovery_cp = MaxSector;
1365 if (sb->events_hi == sb->cp_events_hi &&
1366 sb->events_lo == sb->cp_events_lo) {
1367 mddev->recovery_cp = sb->recovery_cp;
1369 mddev->recovery_cp = 0;
1372 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1373 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1374 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1375 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1377 mddev->max_disks = MD_SB_DISKS;
1379 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1380 mddev->bitmap_info.file == NULL) {
1381 mddev->bitmap_info.offset =
1382 mddev->bitmap_info.default_offset;
1383 mddev->bitmap_info.space =
1384 mddev->bitmap_info.default_space;
1387 } else if (mddev->pers == NULL) {
1391 if (sb->disks[rdev->desc_nr].state & (
1393 if (ev1 < mddev->events)
1394 return -EINVAL;
1395 } else if (mddev->bitmap) {
1399 if (ev1 < mddev->bitmap->events_cleared)
1401 if (ev1 < mddev->events)
1402 set_bit(Bitmap_sync, &rdev->flags);
1404 if (ev1 < mddev->events)
1405 /* just a hot-add of a new device, leave raid_disk at -1 */
1409 if (mddev->level != LEVEL_MULTIPATH) {
1410 desc = sb->disks + rdev->desc_nr;
1412 if (desc->state & (1<<MD_DISK_FAULTY))
1413 set_bit(Faulty, &rdev->flags);
1414 else if (desc->state & (1<<MD_DISK_SYNC) /* &&
1415 desc->raid_disk < mddev->raid_disks */) {
1416 set_bit(In_sync, &rdev->flags);
1417 rdev->raid_disk = desc->raid_disk;
1418 rdev->saved_raid_disk = desc->raid_disk;
1419 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1423 if (mddev->minor_version >= 91) {
1424 rdev->recovery_offset = 0;
1425 rdev->raid_disk = desc->raid_disk;
1428 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1429 set_bit(WriteMostly, &rdev->flags);
1430 if (desc->state & (1<<MD_DISK_FAILFAST))
1431 set_bit(FailFast, &rdev->flags);
1433 set_bit(In_sync, &rdev->flags);
1444 int next_spare = mddev->raid_disks;
1446 /* make rdev->sb match mddev data..
1459 rdev->sb_size = MD_SB_BYTES;
1461 sb = page_address(rdev->sb_page);
1465 sb->md_magic = MD_SB_MAGIC;
1466 sb->major_version = mddev->major_version;
1467 sb->patch_version = mddev->patch_version;
1468 sb->gvalid_words = 0; /* ignored */
1469 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1470 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1471 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1472 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1474 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1475 sb->level = mddev->level;
1476 sb->size = mddev->dev_sectors / 2;
1477 sb->raid_disks = mddev->raid_disks;
1478 sb->md_minor = mddev->md_minor;
1479 sb->not_persistent = 0;
1480 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1481 sb->state = 0;
1482 sb->events_hi = (mddev->events>>32);
1483 sb->events_lo = (u32)mddev->events;
1485 if (mddev->reshape_position == MaxSector)
1486 sb->minor_version = 90;
1488 sb->minor_version = 91;
1489 sb->reshape_position = mddev->reshape_position;
1490 sb->new_level = mddev->new_level;
1491 sb->delta_disks = mddev->delta_disks;
1492 sb->new_layout = mddev->new_layout;
1493 sb->new_chunk = mddev->new_chunk_sectors << 9;
1495 mddev->minor_version = sb->minor_version;
1496 if (mddev->in_sync)
1498 sb->recovery_cp = mddev->recovery_cp;
1499 sb->cp_events_hi = (mddev->events>>32);
1500 sb->cp_events_lo = (u32)mddev->events;
1501 if (mddev->recovery_cp == MaxSector)
1502 sb->state = (1<< MD_SB_CLEAN);
1504 sb->recovery_cp = 0;
1506 sb->layout = mddev->layout;
1507 sb->chunk_size = mddev->chunk_sectors << 9;
1509 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1510 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1512 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1516 int is_active = test_bit(In_sync, &rdev2->flags);
1518 if (rdev2->raid_disk >= 0 &&
1519 sb->minor_version >= 91)
1522 * we can piggy-back on that.
1525 if (rdev2->raid_disk < 0 ||
1526 test_bit(Faulty, &rdev2->flags))
1529 desc_nr = rdev2->raid_disk;
1532 rdev2->desc_nr = desc_nr;
1533 d = &sb->disks[rdev2->desc_nr];
1535 d->number = rdev2->desc_nr;
1536 d->major = MAJOR(rdev2->bdev->bd_dev);
1537 d->minor = MINOR(rdev2->bdev->bd_dev);
1539 d->raid_disk = rdev2->raid_disk;
1541 d->raid_disk = rdev2->desc_nr; /* compatibility */
1542 if (test_bit(Faulty, &rdev2->flags))
1543 d->state = (1<<MD_DISK_FAULTY);
1545 d->state = (1<<MD_DISK_ACTIVE);
1546 if (test_bit(In_sync, &rdev2->flags))
1547 d->state |= (1<<MD_DISK_SYNC);
1551 d->state = 0;
1555 if (test_bit(WriteMostly, &rdev2->flags))
1556 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1557 if (test_bit(FailFast, &rdev2->flags))
1558 d->state |= (1<<MD_DISK_FAILFAST);
1561 for (i=0 ; i < mddev->raid_disks ; i++) {
1562 mdp_disk_t *d = &sb->disks[i];
1563 if (d->state == 0 && d->number == 0) {
1564 d->number = i;
1565 d->raid_disk = i;
1566 d->state = (1<<MD_DISK_REMOVED);
1567 d->state |= (1<<MD_DISK_FAULTY);
1571 sb->nr_disks = nr_disks;
1572 sb->active_disks = active;
1573 sb->working_disks = working;
1574 sb->failed_disks = failed;
1575 sb->spare_disks = spare;
1577 sb->this_disk = sb->disks[rdev->desc_nr];
1578 sb->sb_csum = calc_sb_csum(sb);
1587 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1589 if (rdev->mddev->bitmap_info.offset)
1591 rdev->sb_start = calc_dev_sboffset(rdev);
1592 if (!num_sectors || num_sectors > rdev->sb_start)
1593 num_sectors = rdev->sb_start;
1597 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1598 num_sectors = (sector_t)(2ULL << 32) - 2;
1600 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1601 rdev->sb_page);
1602 } while (md_super_wait(rdev->mddev) < 0);
1609 /* non-zero offset changes not possible with v0.90 */
1622 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1625 disk_csum = sb->sb_csum;
1626 sb->sb_csum = 0;
1628 for (; size >= 4; size -= 4)
1635 sb->sb_csum = disk_csum;
1659 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1660 sb_start -= 8*2;
1661 sb_start &= ~(sector_t)(4*2-1);
1670 return -EINVAL;
1672 rdev->sb_start = sb_start;
1680 sb = page_address(rdev->sb_page);
1682 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1683 sb->major_version != cpu_to_le32(1) ||
1684 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1685 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1686 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1687 return -EINVAL;
1689 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1691 bdevname(rdev->bdev,b));
1692 return -EINVAL;
1694 if (le64_to_cpu(sb->data_size) < 10) {
1696 bdevname(rdev->bdev,b));
1697 return -EINVAL;
1699 if (sb->pad0 ||
1700 sb->pad3[0] ||
1701 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1702 /* Some padding is non-zero, might be a new feature */
1703 return -EINVAL;
1705 rdev->preferred_minor = 0xffff;
1706 rdev->data_offset = le64_to_cpu(sb->data_offset);
1707 rdev->new_data_offset = rdev->data_offset;
1708 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1709 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1710 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1711 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1713 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1714 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1715 if (rdev->sb_size & bmask)
1716 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1719 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1720 return -EINVAL;
1722 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1723 return -EINVAL;
1725 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1726 rdev->desc_nr = -1;
1728 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1730 if (!rdev->bb_page) {
1731 rdev->bb_page = alloc_page(GFP_KERNEL);
1732 if (!rdev->bb_page)
1733 return -ENOMEM;
1735 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1736 rdev->badblocks.count == 0) {
1744 int sectors = le16_to_cpu(sb->bblog_size);
1746 return -EINVAL;
1747 offset = le32_to_cpu(sb->bblog_offset);
1749 return -EINVAL;
1752 rdev->bb_page, REQ_OP_READ, 0, true))
1753 return -EIO;
1754 bbp = (__le64 *)page_address(rdev->bb_page);
1755 rdev->badblocks.shift = sb->bblog_shift;
1756 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1760 sector <<= sb->bblog_shift;
1761 count <<= sb->bblog_shift;
1764 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1765 return -EINVAL;
1767 } else if (sb->bblog_offset != 0)
1768 rdev->badblocks.shift = 0;
1770 if ((le32_to_cpu(sb->feature_map) &
1772 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1773 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1774 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1777 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1778 sb->level != 0)
1779 return -EINVAL;
1782 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1783 (rdev->desc_nr >= 0 &&
1784 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1785 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1786 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1796 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1798 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1799 sb->level != refsb->level ||
1800 sb->layout != refsb->layout ||
1801 sb->chunksize != refsb->chunksize) {
1803 bdevname(rdev->bdev,b),
1804 bdevname(refdev->bdev,b2));
1805 return -EINVAL;
1807 ev1 = le64_to_cpu(sb->events);
1808 ev2 = le64_to_cpu(refsb->events);
1816 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1817 sectors -= rdev->data_offset;
1819 sectors = rdev->sb_start;
1820 if (sectors < le64_to_cpu(sb->data_size))
1821 return -EINVAL;
1822 rdev->sectors = le64_to_cpu(sb->data_size);
1828 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1829 __u64 ev1 = le64_to_cpu(sb->events);
1831 rdev->raid_disk = -1;
1832 clear_bit(Faulty, &rdev->flags);
1833 clear_bit(In_sync, &rdev->flags);
1834 clear_bit(Bitmap_sync, &rdev->flags);
1835 clear_bit(WriteMostly, &rdev->flags);
1837 if (mddev->raid_disks == 0) {
1838 mddev->major_version = 1;
1839 mddev->patch_version = 0;
1840 mddev->external = 0;
1841 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1842 mddev->ctime = le64_to_cpu(sb->ctime);
1843 mddev->utime = le64_to_cpu(sb->utime);
1844 mddev->level = le32_to_cpu(sb->level);
1845 mddev->clevel[0] = 0;
1846 mddev->layout = le32_to_cpu(sb->layout);
1847 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1848 mddev->dev_sectors = le64_to_cpu(sb->size);
1849 mddev->events = ev1;
1850 mddev->bitmap_info.offset = 0;
1851 mddev->bitmap_info.space = 0;
1853 * using 3K - total of 4K
1855 mddev->bitmap_info.default_offset = 1024 >> 9;
1856 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1857 mddev->reshape_backwards = 0;
1859 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1860 memcpy(mddev->uuid, sb->set_uuid, 16);
1862 mddev->max_disks = (4096-256)/2;
1864 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1865 mddev->bitmap_info.file == NULL) {
1866 mddev->bitmap_info.offset =
1867 (__s32)le32_to_cpu(sb->bitmap_offset);
1873 if (mddev->minor_version > 0)
1874 mddev->bitmap_info.space = 0;
1875 else if (mddev->bitmap_info.offset > 0)
1876 mddev->bitmap_info.space =
1877 8 - mddev->bitmap_info.offset;
1879 mddev->bitmap_info.space =
1880 -mddev->bitmap_info.offset;
1883 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1884 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1885 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1886 mddev->new_level = le32_to_cpu(sb->new_level);
1887 mddev->new_layout = le32_to_cpu(sb->new_layout);
1888 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1889 if (mddev->delta_disks < 0 ||
1890 (mddev->delta_disks == 0 &&
1891 (le32_to_cpu(sb->feature_map)
1893 mddev->reshape_backwards = 1;
1895 mddev->reshape_position = MaxSector;
1896 mddev->delta_disks = 0;
1897 mddev->new_level = mddev->level;
1898 mddev->new_layout = mddev->layout;
1899 mddev->new_chunk_sectors = mddev->chunk_sectors;
1902 if (mddev->level == 0 &&
1903 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1904 mddev->layout = -1;
1906 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1907 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1909 if (le32_to_cpu(sb->feature_map) &
1911 if (le32_to_cpu(sb->feature_map) &
1913 return -EINVAL;
1914 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1915 (le32_to_cpu(sb->feature_map) &
1917 return -EINVAL;
1918 set_bit(MD_HAS_PPL, &mddev->flags);
1920 } else if (mddev->pers == NULL) {
1926 if (rdev->desc_nr >= 0 &&
1927 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1928 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1929 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1930 if (ev1 + 1 < mddev->events)
1931 return -EINVAL;
1932 } else if (mddev->bitmap) {
1936 if (ev1 < mddev->bitmap->events_cleared)
1938 if (ev1 < mddev->events)
1939 set_bit(Bitmap_sync, &rdev->flags);
1941 if (ev1 < mddev->events)
1942 /* just a hot-add of a new device, leave raid_disk at -1 */
1945 if (mddev->level != LEVEL_MULTIPATH) {
1947 if (rdev->desc_nr < 0 ||
1948 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1950 rdev->desc_nr = -1;
1951 } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
1966 struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
1967 u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
1969 if (rdev->desc_nr >= freshest_max_dev) {
1971 pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
1972 mdname(mddev), rdev->bdev, rdev->desc_nr,
1973 freshest->bdev, freshest_max_dev);
1974 return -EUCLEAN;
1977 role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
1979 mdname(mddev), rdev->bdev, role, role, freshest->bdev);
1981 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1987 set_bit(Faulty, &rdev->flags);
1990 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1993 return -EINVAL;
1995 set_bit(Journal, &rdev->flags);
1996 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1997 rdev->raid_disk = 0;
2000 rdev->saved_raid_disk = role;
2001 if ((le32_to_cpu(sb->feature_map) &
2003 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
2004 if (!(le32_to_cpu(sb->feature_map) &
2006 rdev->saved_raid_disk = -1;
2013 &mddev->recovery))
2014 set_bit(In_sync, &rdev->flags);
2016 rdev->raid_disk = role;
2019 if (sb->devflags & WriteMostly1)
2020 set_bit(WriteMostly, &rdev->flags);
2021 if (sb->devflags & FailFast1)
2022 set_bit(FailFast, &rdev->flags);
2023 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
2024 set_bit(Replacement, &rdev->flags);
2026 set_bit(In_sync, &rdev->flags);
2036 /* make rdev->sb match mddev and rdev data. */
2038 sb = page_address(rdev->sb_page);
2040 sb->feature_map = 0;
2041 sb->pad0 = 0;
2042 sb->recovery_offset = cpu_to_le64(0);
2043 memset(sb->pad3, 0, sizeof(sb->pad3));
2045 sb->utime = cpu_to_le64((__u64)mddev->utime);
2046 sb->events = cpu_to_le64(mddev->events);
2047 if (mddev->in_sync)
2048 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2049 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2050 sb->resync_offset = cpu_to_le64(MaxSector);
2052 sb->resync_offset = cpu_to_le64(0);
2054 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2056 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2057 sb->size = cpu_to_le64(mddev->dev_sectors);
2058 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2059 sb->level = cpu_to_le32(mddev->level);
2060 sb->layout = cpu_to_le32(mddev->layout);
2061 if (test_bit(FailFast, &rdev->flags))
2062 sb->devflags |= FailFast1;
2064 sb->devflags &= ~FailFast1;
2066 if (test_bit(WriteMostly, &rdev->flags))
2067 sb->devflags |= WriteMostly1;
2069 sb->devflags &= ~WriteMostly1;
2070 sb->data_offset = cpu_to_le64(rdev->data_offset);
2071 sb->data_size = cpu_to_le64(rdev->sectors);
2073 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2074 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2075 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2078 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2079 !test_bit(In_sync, &rdev->flags)) {
2080 sb->feature_map |=
2082 sb->recovery_offset =
2083 cpu_to_le64(rdev->recovery_offset);
2084 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2085 sb->feature_map |=
2089 if (test_bit(Journal, &rdev->flags))
2090 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2091 if (test_bit(Replacement, &rdev->flags))
2092 sb->feature_map |=
2095 if (mddev->reshape_position != MaxSector) {
2096 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2097 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2098 sb->new_layout = cpu_to_le32(mddev->new_layout);
2099 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2100 sb->new_level = cpu_to_le32(mddev->new_level);
2101 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2102 if (mddev->delta_disks == 0 &&
2103 mddev->reshape_backwards)
2104 sb->feature_map
2106 if (rdev->new_data_offset != rdev->data_offset) {
2107 sb->feature_map
2109 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2110 - rdev->data_offset));
2115 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2117 if (rdev->badblocks.count == 0)
2119 else if (sb->bblog_offset == 0)
2123 struct badblocks *bb = &rdev->badblocks;
2124 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2125 u64 *p = bb->page;
2126 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2127 if (bb->changed) {
2131 seq = read_seqbegin(&bb->lock);
2135 for (i = 0 ; i < bb->count ; i++) {
2141 bb->changed = 0;
2142 if (read_seqretry(&bb->lock, seq))
2145 bb->sector = (rdev->sb_start +
2146 (int)le32_to_cpu(sb->bblog_offset));
2147 bb->size = le16_to_cpu(sb->bblog_size);
2153 if (rdev2->desc_nr+1 > max_dev)
2154 max_dev = rdev2->desc_nr+1;
2156 if (max_dev > le32_to_cpu(sb->max_dev)) {
2158 sb->max_dev = cpu_to_le32(max_dev);
2159 rdev->sb_size = max_dev * 2 + 256;
2160 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2161 if (rdev->sb_size & bmask)
2162 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2164 max_dev = le32_to_cpu(sb->max_dev);
2167 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2169 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2170 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2172 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2173 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2174 sb->feature_map |=
2177 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2178 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2179 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2183 i = rdev2->desc_nr;
2184 if (test_bit(Faulty, &rdev2->flags))
2185 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2186 else if (test_bit(In_sync, &rdev2->flags))
2187 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2188 else if (test_bit(Journal, &rdev2->flags))
2189 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2190 else if (rdev2->raid_disk >= 0)
2191 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2193 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2196 sb->sb_csum = calc_sb_1_csum(sb);
2208 else if (dev_size - 64*2 >= 200*1024*1024*2)
2210 else if (dev_size - 4*2 > 8*1024*1024*2)
2222 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2224 if (rdev->data_offset != rdev->new_data_offset)
2226 if (rdev->sb_start < rdev->data_offset) {
2228 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2229 max_sectors -= rdev->data_offset;
2232 } else if (rdev->mddev->bitmap_info.offset) {
2238 sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
2241 sb_start = dev_size - 8*2;
2242 sb_start &= ~(sector_t)(4*2 - 1);
2249 max_sectors = sb_start - bm_space - 4*2;
2253 rdev->sb_start = sb_start;
2255 sb = page_address(rdev->sb_page);
2256 sb->data_size = cpu_to_le64(num_sectors);
2257 sb->super_offset = cpu_to_le64(rdev->sb_start);
2258 sb->sb_csum = calc_sb_1_csum(sb);
2260 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2261 rdev->sb_page);
2262 } while (md_super_wait(rdev->mddev) < 0);
2273 if (new_offset >= rdev->data_offset)
2278 if (rdev->mddev->minor_version == 0)
2285 * beyond write-intent bitmap
2287 if (rdev->sb_start + (32+4)*2 > new_offset)
2289 bitmap = rdev->mddev->bitmap;
2290 if (bitmap && !rdev->mddev->bitmap_info.file &&
2291 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2292 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2294 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2311 .name = "md-1",
2323 if (mddev->sync_super) {
2324 mddev->sync_super(mddev, rdev);
2328 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2330 super_types[mddev->major_version].sync_super(mddev, rdev);
2339 if (test_bit(Faulty, &rdev->flags) ||
2340 test_bit(Journal, &rdev->flags) ||
2341 rdev->raid_disk == -1)
2344 if (test_bit(Faulty, &rdev2->flags) ||
2345 test_bit(Journal, &rdev2->flags) ||
2346 rdev2->raid_disk == -1)
2348 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2371 if (list_empty(&mddev->disks))
2373 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2376 /* skip spares and non-functional disks */
2377 if (test_bit(Faulty, &rdev->flags))
2379 if (rdev->raid_disk < 0)
2387 if (blk_integrity_compare(reference->bdev->bd_disk,
2388 rdev->bdev->bd_disk) < 0)
2389 return -EINVAL;
2391 if (!reference || !bdev_get_integrity(reference->bdev))
2397 blk_integrity_register(mddev->gendisk,
2398 bdev_get_integrity(reference->bdev));
2401 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2404 return -EINVAL;
2419 if (!mddev->gendisk)
2422 bi_mddev = blk_get_integrity(mddev->gendisk);
2427 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2429 mdname(mddev), bdevname(rdev->bdev, name));
2430 return -ENXIO;
2444 if (find_rdev(mddev, rdev->bdev->bd_dev))
2445 return -EEXIST;
2447 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2448 mddev->pers)
2449 return -EROFS;
2451 /* make sure rdev->sectors exceeds mddev->dev_sectors */
2452 if (!test_bit(Journal, &rdev->flags) &&
2453 rdev->sectors &&
2454 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2455 if (mddev->pers) {
2457 * If mddev->level <= 0, then we don't care
2460 if (mddev->level > 0)
2461 return -ENOSPC;
2463 mddev->dev_sectors = rdev->sectors;
2466 /* Verify rdev->desc_nr is unique.
2467 * If it is -1, assign a free number, else
2471 if (rdev->desc_nr < 0) {
2473 if (mddev->pers)
2474 choice = mddev->raid_disks;
2477 rdev->desc_nr = choice;
2479 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2481 return -EBUSY;
2485 if (!test_bit(Journal, &rdev->flags) &&
2486 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2488 mdname(mddev), mddev->max_disks);
2489 return -EBUSY;
2491 bdevname(rdev->bdev,b);
2494 rdev->mddev = mddev;
2497 if (mddev->raid_disks)
2500 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2503 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2505 err = sysfs_create_link(&rdev->kobj, ko, "block");
2506 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2507 rdev->sysfs_unack_badblocks =
2508 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2509 rdev->sysfs_badblocks =
2510 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2512 list_add_rcu(&rdev->same_set, &mddev->disks);
2513 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2516 mddev->recovery_disabled++;
2521 pr_warn("md: failed to register dev-%s for %s\n",
2530 kobject_del(&rdev->kobj);
2531 kobject_put(&rdev->kobj);
2538 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2539 list_del_rcu(&rdev->same_set);
2540 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2541 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2542 rdev->mddev = NULL;
2543 sysfs_remove_link(&rdev->kobj, "block");
2544 sysfs_put(rdev->sysfs_state);
2545 sysfs_put(rdev->sysfs_unack_badblocks);
2546 sysfs_put(rdev->sysfs_badblocks);
2547 rdev->sysfs_state = NULL;
2548 rdev->sysfs_unack_badblocks = NULL;
2549 rdev->sysfs_badblocks = NULL;
2550 rdev->badblocks.count = 0;
2556 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2557 kobject_get(&rdev->kobj);
2558 queue_work(md_rdev_misc_wq, &rdev->del_work);
2574 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2578 rdev->bdev = bdev;
2584 struct block_device *bdev = rdev->bdev;
2585 rdev->bdev = NULL;
2595 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2598 if (test_bit(AutoDetected, &rdev->flags))
2599 md_autodetect_dev(rdev->bdev->bd_dev);
2602 kobject_put(&rdev->kobj);
2616 while (!list_empty(&mddev->disks)) {
2617 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2621 mddev->raid_disks = 0;
2622 mddev->major_version = 0;
2627 lockdep_assert_held(&mddev->lock);
2628 if (!mddev->in_sync) {
2629 mddev->sync_checkers++;
2630 spin_unlock(&mddev->lock);
2631 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2632 spin_lock(&mddev->lock);
2633 if (!mddev->in_sync &&
2634 percpu_ref_is_zero(&mddev->writes_pending)) {
2635 mddev->in_sync = 1;
2637 * Ensure ->in_sync is visible before we clear
2638 * ->sync_checkers.
2641 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2642 sysfs_notify_dirent_safe(mddev->sysfs_state);
2644 if (--mddev->sync_checkers == 0)
2645 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2647 if (mddev->safemode == 1)
2648 mddev->safemode = 0;
2649 return mddev->in_sync;
2654 /* Update each superblock (in-memory image), but
2662 if (rdev->sb_events == mddev->events ||
2664 rdev->raid_disk < 0 &&
2665 rdev->sb_events+1 == mddev->events)) {
2667 rdev->sb_loaded = 2;
2670 rdev->sb_loaded = 1;
2683 if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
2692 sb = page_address(rdev->sb_page);
2695 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2697 if (role == 0xffff && rdev->raid_disk >=0 &&
2698 !test_bit(Faulty, &rdev->flags))
2701 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2706 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2707 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2708 (mddev->layout != le32_to_cpu(sb->layout)) ||
2709 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2710 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2722 int ret = -1;
2724 if (mddev->ro) {
2726 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2732 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2734 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2736 ret = md_cluster_ops->metadata_update_start(mddev);
2740 md_cluster_ops->metadata_update_cancel(mddev);
2741 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2751 * During reshape/resync it might use array-addresses rather
2755 if (rdev->raid_disk >= 0 &&
2756 mddev->delta_disks >= 0 &&
2757 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2758 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2759 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2760 !test_bit(Journal, &rdev->flags) &&
2761 !test_bit(In_sync, &rdev->flags) &&
2762 mddev->curr_resync_completed > rdev->recovery_offset)
2763 rdev->recovery_offset = mddev->curr_resync_completed;
2766 if (!mddev->persistent) {
2767 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2768 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2769 if (!mddev->external) {
2770 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2772 if (rdev->badblocks.changed) {
2773 rdev->badblocks.changed = 0;
2774 ack_all_badblocks(&rdev->badblocks);
2777 clear_bit(Blocked, &rdev->flags);
2778 clear_bit(BlockedBadBlocks, &rdev->flags);
2779 wake_up(&rdev->blocked_wait);
2782 wake_up(&mddev->sb_wait);
2786 spin_lock(&mddev->lock);
2788 mddev->utime = ktime_get_real_seconds();
2790 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2792 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2793 /* just a clean<-> dirty transition, possibly leave spares alone,
2800 if (mddev->degraded)
2804 * might have a event_count that still looks up-to-date,
2805 * so it can be re-added without a resync.
2812 sync_req = mddev->in_sync;
2814 /* If this is just a dirty<->clean transition, and the array is clean
2817 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2818 && mddev->can_decrease_events
2819 && mddev->events != 1) {
2820 mddev->events--;
2821 mddev->can_decrease_events = 0;
2824 mddev->events ++;
2825 mddev->can_decrease_events = nospares;
2829 * This 64-bit counter should never wrap.
2833 WARN_ON(mddev->events == 0);
2836 if (rdev->badblocks.changed)
2838 if (test_bit(Faulty, &rdev->flags))
2839 set_bit(FaultRecorded, &rdev->flags);
2843 spin_unlock(&mddev->lock);
2846 mdname(mddev), mddev->in_sync);
2848 if (mddev->queue)
2849 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2851 md_bitmap_update_sb(mddev->bitmap);
2855 if (rdev->sb_loaded != 1)
2858 if (!test_bit(Faulty, &rdev->flags)) {
2860 rdev->sb_start, rdev->sb_size,
2861 rdev->sb_page);
2863 bdevname(rdev->bdev, b),
2864 (unsigned long long)rdev->sb_start);
2865 rdev->sb_events = mddev->events;
2866 if (rdev->badblocks.size) {
2868 rdev->badblocks.sector,
2869 rdev->badblocks.size << 9,
2870 rdev->bb_page);
2871 rdev->badblocks.size = 0;
2876 bdevname(rdev->bdev, b));
2878 if (mddev->level == LEVEL_MULTIPATH)
2884 /* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
2887 md_cluster_ops->metadata_update_finish(mddev);
2889 if (mddev->in_sync != sync_req ||
2890 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2894 wake_up(&mddev->sb_wait);
2895 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2896 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2899 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2900 clear_bit(Blocked, &rdev->flags);
2903 ack_all_badblocks(&rdev->badblocks);
2904 clear_bit(BlockedBadBlocks, &rdev->flags);
2905 wake_up(&rdev->blocked_wait);
2912 struct mddev *mddev = rdev->mddev;
2914 bool add_journal = test_bit(Journal, &rdev->flags);
2916 if (!mddev->pers->hot_remove_disk || add_journal) {
2921 super_types[mddev->major_version].
2925 err = mddev->pers->hot_add_disk(mddev, rdev);
2933 sysfs_notify_dirent_safe(rdev->sysfs_state);
2935 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2936 if (mddev->degraded)
2937 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2938 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2940 md_wakeup_thread(mddev->thread);
2975 unsigned long flags = READ_ONCE(rdev->flags);
2979 rdev->badblocks.unacked_exist))
2988 (rdev->badblocks.unacked_exist
3007 len -= strlen(sep);
3016 * faulty - simulates an error
3017 * remove - disconnects the device
3018 * writemostly - sets write_mostly
3019 * -writemostly - clears write_mostly
3020 * blocked - sets the Blocked flags
3021 * -blocked - clears the Blocked and possibly simulates an error
3022 * insync - sets Insync providing device isn't active
3023 * -insync - clear Insync for a device with a slot assigned,
3025 * write_error - sets WriteErrorSeen
3026 * -write_error - clears WriteErrorSeen
3027 * {,-}failfast - set/clear FailFast
3030 struct mddev *mddev = rdev->mddev;
3031 int err = -EINVAL;
3034 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
3035 md_error(rdev->mddev, rdev);
3036 if (test_bit(Faulty, &rdev->flags))
3039 err = -EBUSY;
3041 if (rdev->mddev->pers) {
3042 clear_bit(Blocked, &rdev->flags);
3043 remove_and_add_spares(rdev->mddev, rdev);
3045 if (rdev->raid_disk >= 0)
3046 err = -EBUSY;
3050 err = md_cluster_ops->remove_disk(mddev, rdev);
3054 if (mddev->pers) {
3055 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3056 md_wakeup_thread(mddev->thread);
3062 set_bit(WriteMostly, &rdev->flags);
3063 mddev_create_serial_pool(rdev->mddev, rdev, false);
3066 } else if (cmd_match(buf, "-writemostly")) {
3067 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3068 clear_bit(WriteMostly, &rdev->flags);
3072 set_bit(Blocked, &rdev->flags);
3074 } else if (cmd_match(buf, "-blocked")) {
3075 if (!test_bit(Faulty, &rdev->flags) &&
3076 !test_bit(ExternalBbl, &rdev->flags) &&
3077 rdev->badblocks.unacked_exist) {
3081 md_error(rdev->mddev, rdev);
3083 clear_bit(Blocked, &rdev->flags);
3084 clear_bit(BlockedBadBlocks, &rdev->flags);
3085 wake_up(&rdev->blocked_wait);
3086 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3087 md_wakeup_thread(rdev->mddev->thread);
3090 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3091 set_bit(In_sync, &rdev->flags);
3094 set_bit(FailFast, &rdev->flags);
3097 } else if (cmd_match(buf, "-failfast")) {
3098 clear_bit(FailFast, &rdev->flags);
3101 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3102 !test_bit(Journal, &rdev->flags)) {
3103 if (rdev->mddev->pers == NULL) {
3104 clear_bit(In_sync, &rdev->flags);
3105 rdev->saved_raid_disk = rdev->raid_disk;
3106 rdev->raid_disk = -1;
3110 set_bit(WriteErrorSeen, &rdev->flags);
3112 } else if (cmd_match(buf, "-write_error")) {
3113 clear_bit(WriteErrorSeen, &rdev->flags);
3116 /* Any non-spare device that is not a replacement can
3120 if (rdev->raid_disk >= 0 &&
3121 !test_bit(Journal, &rdev->flags) &&
3122 !test_bit(Replacement, &rdev->flags))
3123 set_bit(WantReplacement, &rdev->flags);
3124 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3125 md_wakeup_thread(rdev->mddev->thread);
3127 } else if (cmd_match(buf, "-want_replacement")) {
3132 clear_bit(WantReplacement, &rdev->flags);
3138 if (rdev->mddev->pers)
3139 err = -EBUSY;
3141 set_bit(Replacement, &rdev->flags);
3144 } else if (cmd_match(buf, "-replacement")) {
3146 if (rdev->mddev->pers)
3147 err = -EBUSY;
3149 clear_bit(Replacement, &rdev->flags);
3152 } else if (cmd_match(buf, "re-add")) {
3153 if (!rdev->mddev->pers)
3154 err = -EINVAL;
3155 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3156 rdev->saved_raid_disk >= 0) {
3163 if (!mddev_is_clustered(rdev->mddev) ||
3164 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3165 clear_bit(Faulty, &rdev->flags);
3169 err = -EBUSY;
3170 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3171 set_bit(ExternalBbl, &rdev->flags);
3172 rdev->badblocks.shift = 0;
3174 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3175 clear_bit(ExternalBbl, &rdev->flags);
3181 sysfs_notify_dirent_safe(rdev->sysfs_state);
3190 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3202 atomic_set(&rdev->corrected_errors, n);
3211 if (test_bit(Journal, &rdev->flags))
3213 else if (rdev->raid_disk < 0)
3216 return sprintf(page, "%d\n", rdev->raid_disk);
3225 if (test_bit(Journal, &rdev->flags))
3226 return -EBUSY;
3228 slot = -1;
3235 return -ENOSPC;
3237 if (rdev->mddev->pers && slot == -1) {
3240 * with the personality with ->hot_*_disk.
3245 if (rdev->raid_disk == -1)
3246 return -EEXIST;
3248 if (rdev->mddev->pers->hot_remove_disk == NULL)
3249 return -EINVAL;
3250 clear_bit(Blocked, &rdev->flags);
3251 remove_and_add_spares(rdev->mddev, rdev);
3252 if (rdev->raid_disk >= 0)
3253 return -EBUSY;
3254 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3255 md_wakeup_thread(rdev->mddev->thread);
3256 } else if (rdev->mddev->pers) {
3262 if (rdev->raid_disk != -1)
3263 return -EBUSY;
3265 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3266 return -EBUSY;
3268 if (rdev->mddev->pers->hot_add_disk == NULL)
3269 return -EINVAL;
3271 if (slot >= rdev->mddev->raid_disks &&
3272 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3273 return -ENOSPC;
3275 rdev->raid_disk = slot;
3276 if (test_bit(In_sync, &rdev->flags))
3277 rdev->saved_raid_disk = slot;
3279 rdev->saved_raid_disk = -1;
3280 clear_bit(In_sync, &rdev->flags);
3281 clear_bit(Bitmap_sync, &rdev->flags);
3282 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3284 rdev->raid_disk = -1;
3287 sysfs_notify_dirent_safe(rdev->sysfs_state);
3289 sysfs_link_rdev(rdev->mddev, rdev);
3292 if (slot >= rdev->mddev->raid_disks &&
3293 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3294 return -ENOSPC;
3295 rdev->raid_disk = slot;
3297 clear_bit(Faulty, &rdev->flags);
3298 clear_bit(WriteMostly, &rdev->flags);
3299 set_bit(In_sync, &rdev->flags);
3300 sysfs_notify_dirent_safe(rdev->sysfs_state);
3311 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3319 return -EINVAL;
3320 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3321 return -EBUSY;
3322 if (rdev->sectors && rdev->mddev->external)
3325 return -EBUSY;
3326 rdev->data_offset = offset;
3327 rdev->new_data_offset = offset;
3337 (unsigned long long)rdev->new_data_offset);
3344 struct mddev *mddev = rdev->mddev;
3347 return -EINVAL;
3349 if (mddev->sync_thread ||
3350 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3351 return -EBUSY;
3352 if (new_offset == rdev->data_offset)
3355 else if (new_offset > rdev->data_offset) {
3357 if (new_offset - rdev->data_offset
3358 + mddev->dev_sectors > rdev->sectors)
3359 return -E2BIG;
3366 if (new_offset < rdev->data_offset &&
3367 mddev->reshape_backwards)
3368 return -EINVAL;
3373 if (new_offset > rdev->data_offset &&
3374 !mddev->reshape_backwards)
3375 return -EINVAL;
3377 if (mddev->pers && mddev->persistent &&
3378 !super_types[mddev->major_version]
3380 return -E2BIG;
3381 rdev->new_data_offset = new_offset;
3382 if (new_offset > rdev->data_offset)
3383 mddev->reshape_backwards = 1;
3384 else if (new_offset < rdev->data_offset)
3385 mddev->reshape_backwards = 0;
3395 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3414 return -EINVAL;
3416 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3417 return -EINVAL; /* sector conversion overflow */
3421 return -EINVAL; /* unsigned long long to sector_t overflow */
3430 struct mddev *my_mddev = rdev->mddev;
3431 sector_t oldsectors = rdev->sectors;
3434 if (test_bit(Journal, &rdev->flags))
3435 return -EBUSY;
3437 return -EINVAL;
3438 if (rdev->data_offset != rdev->new_data_offset)
3439 return -EINVAL; /* too confusing */
3440 if (my_mddev->pers && rdev->raid_disk >= 0) {
3441 if (my_mddev->persistent) {
3442 sectors = super_types[my_mddev->major_version].
3445 return -EBUSY;
3447 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3448 rdev->data_offset;
3449 if (!my_mddev->pers->resize)
3451 return -EINVAL;
3453 if (sectors < my_mddev->dev_sectors)
3454 return -EINVAL; /* component must fit device */
3456 rdev->sectors = sectors;
3457 if (sectors > oldsectors && my_mddev->external) {
3459 * ->bdev do not overlap. 'rcu' is sufficient to walk
3473 if (rdev->bdev == rdev2->bdev &&
3475 overlaps(rdev->data_offset, rdev->sectors,
3476 rdev2->data_offset,
3477 rdev2->sectors)) {
3494 rdev->sectors = oldsectors;
3495 return -EBUSY;
3506 unsigned long long recovery_start = rdev->recovery_offset;
3508 if (test_bit(In_sync, &rdev->flags) ||
3522 return -EINVAL;
3524 if (rdev->mddev->pers &&
3525 rdev->raid_disk >= 0)
3526 return -EBUSY;
3528 rdev->recovery_offset = recovery_start;
3530 set_bit(In_sync, &rdev->flags);
3532 clear_bit(In_sync, &rdev->flags);
3539 /* sysfs access to bad-blocks list.
3541 * 'bad-blocks' lists sector numbers and lengths of ranges that
3543 * the one-page limit of sysfs.
3546 * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
3552 return badblocks_show(&rdev->badblocks, page, 0);
3556 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3558 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3559 wake_up(&rdev->blocked_wait);
3567 return badblocks_show(&rdev->badblocks, page, 1);
3571 return badblocks_store(&rdev->badblocks, page, len, 1);
3579 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3588 return -EINVAL;
3590 return -EINVAL;
3592 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3593 rdev->raid_disk >= 0)
3594 return -EBUSY;
3596 if (rdev->mddev->persistent) {
3597 if (rdev->mddev->major_version == 0)
3598 return -EINVAL;
3599 if ((sector > rdev->sb_start &&
3600 sector - rdev->sb_start > S16_MAX) ||
3601 (sector < rdev->sb_start &&
3602 rdev->sb_start - sector > -S16_MIN))
3603 return -EINVAL;
3604 rdev->ppl.offset = sector - rdev->sb_start;
3605 } else if (!rdev->mddev->external) {
3606 return -EBUSY;
3608 rdev->ppl.sector = sector;
3618 return sprintf(page, "%u\n", rdev->ppl.size);
3627 return -EINVAL;
3629 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3630 rdev->raid_disk >= 0)
3631 return -EBUSY;
3633 if (rdev->mddev->persistent) {
3634 if (rdev->mddev->major_version == 0)
3635 return -EINVAL;
3637 return -EINVAL;
3638 } else if (!rdev->mddev->external) {
3639 return -EBUSY;
3641 rdev->ppl.size = size;
3668 if (!entry->show)
3669 return -EIO;
3670 if (!rdev->mddev)
3671 return -ENODEV;
3672 return entry->show(rdev, page);
3682 struct mddev *mddev = rdev->mddev;
3684 if (!entry->store)
3685 return -EIO;
3687 return -EACCES;
3688 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3690 if (rdev->mddev == NULL)
3691 rv = -ENODEV;
3693 rv = entry->store(rdev, page, length);
3716 rdev->desc_nr = -1;
3717 rdev->saved_raid_disk = -1;
3718 rdev->raid_disk = -1;
3719 rdev->flags = 0;
3720 rdev->data_offset = 0;
3721 rdev->new_data_offset = 0;
3722 rdev->sb_events = 0;
3723 rdev->last_read_error = 0;
3724 rdev->sb_loaded = 0;
3725 rdev->bb_page = NULL;
3726 atomic_set(&rdev->nr_pending, 0);
3727 atomic_set(&rdev->read_errors, 0);
3728 atomic_set(&rdev->corrected_errors, 0);
3730 INIT_LIST_HEAD(&rdev->same_set);
3731 init_waitqueue_head(&rdev->blocked_wait);
3735 * be used - I wonder if that matters
3737 return badblocks_init(&rdev->badblocks, 0);
3745 * - the device is nonexistent (zero size)
3746 * - the device has no valid superblock
3748 * a faulty rdev _never_ has rdev->sb set.
3759 return ERR_PTR(-ENOMEM);
3768 err = lock_rdev(rdev, newdev, super_format == -2);
3772 kobject_init(&rdev->kobj, &rdev_ktype);
3774 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3777 bdevname(rdev->bdev,b));
3778 err = -EINVAL;
3785 if (err == -EINVAL) {
3787 bdevname(rdev->bdev,b),
3793 bdevname(rdev->bdev,b));
3801 if (rdev->bdev)
3820 switch (super_types[mddev->major_version].
3821 load_super(rdev, freshest, mddev->minor_version)) {
3828 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3829 bdevname(rdev->bdev,b));
3836 return -EINVAL;
3839 super_types[mddev->major_version].
3844 if (mddev->max_disks &&
3845 (rdev->desc_nr >= mddev->max_disks ||
3846 i > mddev->max_disks)) {
3848 mdname(mddev), bdevname(rdev->bdev, b),
3849 mddev->max_disks);
3854 if (super_types[mddev->major_version].
3856 pr_warn("md: kicking non-fresh %s from array!\n",
3857 bdevname(rdev->bdev,b));
3862 if (mddev->level == LEVEL_MULTIPATH) {
3863 rdev->desc_nr = i++;
3864 rdev->raid_disk = rdev->desc_nr;
3865 set_bit(In_sync, &rdev->flags);
3866 } else if (rdev->raid_disk >=
3867 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3868 !test_bit(Journal, &rdev->flags)) {
3869 rdev->raid_disk = -1;
3870 clear_bit(In_sync, &rdev->flags);
3877 /* Read a fixed-point number.
3885 * all without any floating-point arithmetic.
3890 long decimals = -1;
3896 value = *cp - '0';
3906 return -EINVAL;
3909 *res = result * int_pow(10, scale - decimals);
3916 unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
3927 return -EINVAL;
3931 return -EINVAL;
3933 mddev->safemode_delay = 0;
3935 unsigned long old_delay = mddev->safemode_delay;
3940 mddev->safemode_delay = new_delay;
3942 mod_timer(&mddev->safemode_timer, jiffies+1);
3954 spin_lock(&mddev->lock);
3955 p = mddev->pers;
3957 ret = sprintf(page, "%s\n", p->name);
3958 else if (mddev->clevel[0])
3959 ret = sprintf(page, "%s\n", mddev->clevel);
3960 else if (mddev->level != LEVEL_NONE)
3961 ret = sprintf(page, "%d\n", mddev->level);
3964 spin_unlock(&mddev->lock);
3980 return -EINVAL;
3986 if (mddev->pers == NULL) {
3987 strncpy(mddev->clevel, buf, slen);
3988 if (mddev->clevel[slen-1] == '\n')
3989 slen--;
3990 mddev->clevel[slen] = 0;
3991 mddev->level = LEVEL_NONE;
3995 rv = -EROFS;
3996 if (mddev->ro)
4000 * - array is not engaged in resync/recovery/reshape
4001 * - old personality can be suspended
4002 * - new personality will access other array.
4005 rv = -EBUSY;
4006 if (mddev->sync_thread ||
4007 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4008 mddev->reshape_position != MaxSector ||
4009 mddev->sysfs_active)
4012 rv = -EINVAL;
4013 if (!mddev->pers->quiesce) {
4015 mdname(mddev), mddev->pers->name);
4021 if (clevel[slen-1] == '\n')
4022 slen--;
4027 if (request_module("md-%s", clevel) != 0)
4028 request_module("md-level-%s", clevel);
4031 if (!pers || !try_module_get(pers->owner)) {
4034 rv = -EINVAL;
4039 if (pers == mddev->pers) {
4041 module_put(pers->owner);
4045 if (!pers->takeover) {
4046 module_put(pers->owner);
4049 rv = -EINVAL;
4054 rdev->new_raid_disk = rdev->raid_disk;
4056 /* ->takeover must set new_* and/or delta_disks
4059 priv = pers->takeover(mddev);
4061 mddev->new_level = mddev->level;
4062 mddev->new_layout = mddev->layout;
4063 mddev->new_chunk_sectors = mddev->chunk_sectors;
4064 mddev->raid_disks -= mddev->delta_disks;
4065 mddev->delta_disks = 0;
4066 mddev->reshape_backwards = 0;
4067 module_put(pers->owner);
4078 spin_lock(&mddev->lock);
4079 oldpers = mddev->pers;
4080 oldpriv = mddev->private;
4081 mddev->pers = pers;
4082 mddev->private = priv;
4083 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4084 mddev->level = mddev->new_level;
4085 mddev->layout = mddev->new_layout;
4086 mddev->chunk_sectors = mddev->new_chunk_sectors;
4087 mddev->delta_disks = 0;
4088 mddev->reshape_backwards = 0;
4089 mddev->degraded = 0;
4090 spin_unlock(&mddev->lock);
4092 if (oldpers->sync_request == NULL &&
4093 mddev->external) {
4094 /* We are converting from a no-redundancy array
4098 * clean->dirty
4101 mddev->in_sync = 0;
4102 mddev->safemode_delay = 0;
4103 mddev->safemode = 0;
4106 oldpers->free(mddev, oldpriv);
4108 if (oldpers->sync_request == NULL &&
4109 pers->sync_request != NULL) {
4111 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4114 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4115 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4116 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4118 if (oldpers->sync_request != NULL &&
4119 pers->sync_request == NULL) {
4121 if (mddev->to_remove == NULL)
4122 mddev->to_remove = &md_redundancy_group;
4125 module_put(oldpers->owner);
4128 if (rdev->raid_disk < 0)
4130 if (rdev->new_raid_disk >= mddev->raid_disks)
4131 rdev->new_raid_disk = -1;
4132 if (rdev->new_raid_disk == rdev->raid_disk)
4137 if (rdev->raid_disk < 0)
4139 if (rdev->new_raid_disk == rdev->raid_disk)
4141 rdev->raid_disk = rdev->new_raid_disk;
4142 if (rdev->raid_disk < 0)
4143 clear_bit(In_sync, &rdev->flags);
4147 rdev->raid_disk, mdname(mddev));
4151 if (pers->sync_request == NULL) {
4155 mddev->in_sync = 1;
4156 del_timer_sync(&mddev->safemode_timer);
4158 blk_set_stacking_limits(&mddev->queue->limits);
4159 pers->run(mddev);
4160 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4162 if (!mddev->thread)
4164 sysfs_notify_dirent_safe(mddev->sysfs_level);
4179 if (mddev->reshape_position != MaxSector &&
4180 mddev->layout != mddev->new_layout)
4182 mddev->new_layout, mddev->layout);
4183 return sprintf(page, "%d\n", mddev->layout);
4199 if (mddev->pers) {
4200 if (mddev->pers->check_reshape == NULL)
4201 err = -EBUSY;
4202 else if (mddev->ro)
4203 err = -EROFS;
4205 mddev->new_layout = n;
4206 err = mddev->pers->check_reshape(mddev);
4208 mddev->new_layout = mddev->layout;
4211 mddev->new_layout = n;
4212 if (mddev->reshape_position == MaxSector)
4213 mddev->layout = n;
4224 if (mddev->raid_disks == 0)
4226 if (mddev->reshape_position != MaxSector &&
4227 mddev->delta_disks != 0)
4228 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4229 mddev->raid_disks - mddev->delta_disks);
4230 return sprintf(page, "%d\n", mddev->raid_disks);
4248 if (mddev->pers)
4250 else if (mddev->reshape_position != MaxSector) {
4252 int olddisks = mddev->raid_disks - mddev->delta_disks;
4254 err = -EINVAL;
4257 rdev->data_offset < rdev->new_data_offset)
4260 rdev->data_offset > rdev->new_data_offset)
4264 mddev->delta_disks = n - olddisks;
4265 mddev->raid_disks = n;
4266 mddev->reshape_backwards = (mddev->delta_disks < 0);
4268 mddev->raid_disks = n;
4279 return sprintf(page, "%pU\n", mddev->uuid);
4287 if (mddev->reshape_position != MaxSector &&
4288 mddev->chunk_sectors != mddev->new_chunk_sectors)
4290 mddev->new_chunk_sectors << 9,
4291 mddev->chunk_sectors << 9);
4292 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4308 if (mddev->pers) {
4309 if (mddev->pers->check_reshape == NULL)
4310 err = -EBUSY;
4311 else if (mddev->ro)
4312 err = -EROFS;
4314 mddev->new_chunk_sectors = n >> 9;
4315 err = mddev->pers->check_reshape(mddev);
4317 mddev->new_chunk_sectors = mddev->chunk_sectors;
4320 mddev->new_chunk_sectors = n >> 9;
4321 if (mddev->reshape_position == MaxSector)
4322 mddev->chunk_sectors = n >> 9;
4333 if (mddev->recovery_cp == MaxSector)
4335 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4351 return -EINVAL;
4357 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4358 err = -EBUSY;
4361 mddev->recovery_cp = n;
4362 if (mddev->pers)
4363 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4388 * read-auto
4391 * clean - no pending writes, but otherwise active.
4395 * if not known, block and switch to write-pending
4401 * write-pending
4404 * active-idle
4408 * RAID0/LINEAR-only: same as clean, but array is missing a member.
4409 * It's useful because RAID0/LINEAR mounted-arrays aren't stopped
4416 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4417 "write-pending", "active-idle", "broken", NULL };
4433 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4434 switch(mddev->ro) {
4442 spin_lock(&mddev->lock);
4443 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4445 else if (mddev->in_sync)
4447 else if (mddev->safemode)
4451 spin_unlock(&mddev->lock);
4454 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4457 if (list_empty(&mddev->disks) &&
4458 mddev->raid_disks == 0 &&
4459 mddev->dev_sectors == 0)
4477 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4481 spin_lock(&mddev->lock);
4484 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4485 md_wakeup_thread(mddev->thread);
4486 wake_up(&mddev->sb_wait);
4490 err = -EBUSY;
4493 sysfs_notify_dirent_safe(mddev->sysfs_state);
4494 spin_unlock(&mddev->lock);
4500 err = -EINVAL;
4510 if (mddev->pers)
4518 if (mddev->pers)
4521 mddev->ro = 1;
4522 set_disk_ro(mddev->gendisk, 1);
4527 if (mddev->pers) {
4528 if (mddev->ro == 0)
4530 else if (mddev->ro == 1)
4533 mddev->ro = 2;
4534 set_disk_ro(mddev->gendisk, 0);
4537 mddev->ro = 2;
4542 if (mddev->pers) {
4546 spin_lock(&mddev->lock);
4548 err = -EBUSY;
4549 spin_unlock(&mddev->lock);
4551 err = -EINVAL;
4554 if (mddev->pers) {
4558 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4559 wake_up(&mddev->sb_wait);
4562 mddev->ro = 0;
4563 set_disk_ro(mddev->gendisk, 0);
4575 if (mddev->hold_active == UNTIL_IOCTL)
4576 mddev->hold_active = 0;
4577 sysfs_notify_dirent_safe(mddev->sysfs_state);
4588 atomic_read(&mddev->max_corr_read_errors));
4601 return -EINVAL;
4602 atomic_set(&mddev->max_corr_read_errors, n);
4613 return -EINVAL;
4623 if (work_pending(&rdev->del_work)) {
4648 return -EINVAL;
4651 return -EINVAL;
4655 return -EOVERFLOW;
4661 if (mddev->persistent) {
4662 rdev = md_import_device(dev, mddev->major_version,
4663 mddev->minor_version);
4664 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4666 = list_entry(mddev->disks.next,
4668 err = super_types[mddev->major_version]
4669 .load_super(rdev, rdev0, mddev->minor_version);
4673 } else if (mddev->external)
4674 rdev = md_import_device(dev, -2, -1);
4676 rdev = md_import_device(dev, -1, -1);
4705 if (!mddev->bitmap)
4707 /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
4711 if (*end == '-') { /* range */
4717 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4720 md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
4733 (unsigned long long)mddev->dev_sectors / 2);
4743 * If array is active, we can try an on-line resize
4753 if (mddev->pers) {
4758 if (mddev->dev_sectors == 0 ||
4759 mddev->dev_sectors > sectors)
4760 mddev->dev_sectors = sectors;
4762 err = -ENOSPC;
4780 if (mddev->persistent)
4782 mddev->major_version, mddev->minor_version);
4783 else if (mddev->external)
4784 return sprintf(page, "external:%s\n", mddev->metadata_type);
4803 err = -EBUSY;
4804 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4806 else if (!list_empty(&mddev->disks))
4811 mddev->persistent = 0;
4812 mddev->external = 0;
4813 mddev->major_version = 0;
4814 mddev->minor_version = 90;
4818 size_t namelen = len-9;
4819 if (namelen >= sizeof(mddev->metadata_type))
4820 namelen = sizeof(mddev->metadata_type)-1;
4821 strncpy(mddev->metadata_type, buf+9, namelen);
4822 mddev->metadata_type[namelen] = 0;
4823 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4824 mddev->metadata_type[--namelen] = 0;
4825 mddev->persistent = 0;
4826 mddev->external = 1;
4827 mddev->major_version = 0;
4828 mddev->minor_version = 90;
4832 err = -EINVAL;
4839 err = -ENOENT;
4842 mddev->major_version = major;
4843 mddev->minor_version = minor;
4844 mddev->persistent = 1;
4845 mddev->external = 0;
4859 unsigned long recovery = mddev->recovery;
4863 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4875 else if (mddev->reshape_position != MaxSector)
4884 if (!mddev->pers || !mddev->pers->sync_request)
4885 return -EINVAL;
4890 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4892 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4893 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4895 if (work_pending(&mddev->del_work))
4897 if (mddev->sync_thread) {
4898 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4903 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4904 return -EBUSY;
4906 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4908 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4909 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4912 if (mddev->pers->start_reshape == NULL)
4913 return -EINVAL;
4916 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
4917 err = -EBUSY;
4918 } else if (mddev->reshape_position == MaxSector ||
4919 mddev->pers->check_reshape == NULL ||
4920 mddev->pers->check_reshape(mddev)) {
4921 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4922 err = mddev->pers->start_reshape(mddev);
4930 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4936 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4939 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4941 return -EINVAL;
4942 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4943 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4944 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4946 if (mddev->ro == 2) {
4948 * canceling read-auto mode
4950 mddev->ro = 0;
4951 md_wakeup_thread(mddev->sync_thread);
4953 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4954 md_wakeup_thread(mddev->thread);
4955 sysfs_notify_dirent_safe(mddev->sysfs_action);
4965 return sprintf(page, "%s\n", mddev->last_sync_action);
4975 atomic64_read(&mddev->resync_mismatches));
4984 mddev->sync_speed_min ? "local": "system");
5000 return -EINVAL;
5002 mddev->sync_speed_min = min;
5013 mddev->sync_speed_max ? "local": "system");
5029 return -EINVAL;
5031 mddev->sync_speed_max = max;
5041 return sprintf(page, "%d\n", mddev->degraded);
5048 return sprintf(page, "%d\n", mddev->parallel_resync);
5057 return -EINVAL;
5060 return -EINVAL;
5062 mddev->parallel_resync = n;
5064 if (mddev->sync_thread)
5079 if (mddev->curr_resync == 0)
5081 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5082 dt = (jiffies - mddev->resync_mark) / HZ;
5084 db = resync - mddev->resync_mark_cnt;
5095 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5098 if (mddev->curr_resync == 1 ||
5099 mddev->curr_resync == 2)
5102 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5103 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5104 max_sectors = mddev->resync_max_sectors;
5106 max_sectors = mddev->dev_sectors;
5108 resync = mddev->curr_resync_completed;
5119 (unsigned long long)mddev->resync_min);
5128 return -EINVAL;
5130 spin_lock(&mddev->lock);
5131 err = -EINVAL;
5132 if (min > mddev->resync_max)
5135 err = -EBUSY;
5136 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5140 mddev->resync_min = round_down(min, 8);
5144 spin_unlock(&mddev->lock);
5154 if (mddev->resync_max == MaxSector)
5158 (unsigned long long)mddev->resync_max);
5164 spin_lock(&mddev->lock);
5166 mddev->resync_max = MaxSector;
5171 err = -EINVAL;
5174 if (max < mddev->resync_min)
5177 err = -EBUSY;
5178 if (max < mddev->resync_max &&
5179 mddev->ro == 0 &&
5180 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5184 chunk = mddev->chunk_sectors;
5188 err = -EINVAL;
5192 mddev->resync_max = max;
5194 wake_up(&mddev->recovery_wait);
5197 spin_unlock(&mddev->lock);
5207 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5220 return -EINVAL;
5225 err = -EINVAL;
5226 if (mddev->pers == NULL ||
5227 mddev->pers->quiesce == NULL)
5230 mddev->suspend_lo = new;
5244 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5257 return -EINVAL;
5262 err = -EINVAL;
5263 if (mddev->pers == NULL)
5267 mddev->suspend_hi = new;
5281 if (mddev->reshape_position != MaxSector)
5283 (unsigned long long)mddev->reshape_position);
5299 return -EINVAL;
5303 err = -EBUSY;
5304 if (mddev->pers)
5306 mddev->reshape_position = new;
5307 mddev->delta_disks = 0;
5308 mddev->reshape_backwards = 0;
5309 mddev->new_level = mddev->level;
5310 mddev->new_layout = mddev->layout;
5311 mddev->new_chunk_sectors = mddev->chunk_sectors;
5313 rdev->new_data_offset = rdev->data_offset;
5328 mddev->reshape_backwards ? "backwards" : "forwards");
5342 return -EINVAL;
5343 if (mddev->reshape_backwards == backwards)
5350 if (mddev->delta_disks)
5351 err = -EBUSY;
5352 else if (mddev->persistent &&
5353 mddev->major_version == 0)
5354 err = -EINVAL;
5356 mddev->reshape_backwards = backwards;
5368 if (mddev->external_size)
5370 (unsigned long long)mddev->array_sectors/2);
5388 return -EINVAL;
5392 if (mddev->pers)
5393 sectors = mddev->pers->size(mddev, 0, 0);
5395 sectors = mddev->array_sectors;
5397 mddev->external_size = 0;
5400 err = -EINVAL;
5401 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5402 err = -E2BIG;
5404 mddev->external_size = 1;
5408 mddev->array_sectors = sectors;
5409 if (mddev->pers) {
5410 set_capacity(mddev->gendisk, mddev->array_sectors);
5411 revalidate_disk_size(mddev->gendisk, true);
5427 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5429 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5431 } else if (mddev->bitmap) {
5433 } else if (mddev->pers) {
5434 if (mddev->pers->sync_request)
5450 if (mddev->pers) {
5451 if (mddev->pers->change_consistency_policy)
5452 err = mddev->pers->change_consistency_policy(mddev, buf);
5454 err = -EBUSY;
5455 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5456 set_bit(MD_HAS_PPL, &mddev->flags);
5458 err = -EINVAL;
5470 return sprintf(page, "%d\n", mddev->fail_last_dev);
5487 if (value != mddev->fail_last_dev)
5488 mddev->fail_last_dev = value;
5498 if (mddev->pers == NULL || (mddev->pers->level != 1))
5501 return sprintf(page, "%d\n", mddev->serialize_policy);
5518 if (value == mddev->serialize_policy)
5524 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5526 err = -EINVAL;
5535 mddev->serialize_policy = value;
5598 if (!entry->show)
5599 return -EIO;
5601 if (list_empty(&mddev->all_mddevs)) {
5603 return -EBUSY;
5608 rv = entry->show(mddev, page);
5621 if (!entry->store)
5622 return -EIO;
5624 return -EACCES;
5626 if (list_empty(&mddev->all_mddevs)) {
5628 return -EBUSY;
5632 rv = entry->store(mddev, page, length);
5641 if (mddev->sysfs_state)
5642 sysfs_put(mddev->sysfs_state);
5643 if (mddev->sysfs_level)
5644 sysfs_put(mddev->sysfs_level);
5646 if (mddev->gendisk)
5647 del_gendisk(mddev->gendisk);
5648 if (mddev->queue)
5649 blk_cleanup_queue(mddev->queue);
5650 if (mddev->gendisk)
5651 put_disk(mddev->gendisk);
5652 percpu_ref_exit(&mddev->writes_pending);
5654 bioset_exit(&mddev->bio_set);
5655 bioset_exit(&mddev->sync_set);
5675 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5676 kobject_del(&mddev->kobj);
5677 kobject_put(&mddev->kobj);
5684 if (mddev->writes_pending.percpu_count_ptr)
5686 if (percpu_ref_init(&mddev->writes_pending, no_op,
5688 return -ENOMEM;
5690 percpu_ref_put(&mddev->writes_pending);
5700 * If dev is non-zero it must be a device number with a MAJOR of
5715 return -ENODEV;
5717 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5719 unit = MINOR(mddev->unit) >> shift;
5728 error = -EEXIST;
5729 if (mddev->gendisk)
5739 if (mddev2->gendisk &&
5740 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5750 mddev->hold_active = UNTIL_STOP;
5752 error = -ENOMEM;
5753 mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
5754 if (!mddev->queue)
5757 blk_set_stacking_limits(&mddev->queue->limits);
5761 blk_cleanup_queue(mddev->queue);
5762 mddev->queue = NULL;
5765 disk->major = MAJOR(mddev->unit);
5766 disk->first_minor = unit << shift;
5768 strcpy(disk->disk_name, name);
5770 sprintf(disk->disk_name, "md_d%d", unit);
5772 sprintf(disk->disk_name, "md%d", unit);
5773 disk->fops = &md_fops;
5774 disk->private_data = mddev;
5775 disk->queue = mddev->queue;
5776 blk_queue_write_cache(mddev->queue, true, true);
5781 disk->flags |= GENHD_FL_EXT_DEVT;
5782 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5783 mddev->gendisk = disk;
5786 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5791 pr_debug("md: cannot register %s/md - name in use\n",
5792 disk->disk_name);
5795 if (mddev->kobj.sd &&
5796 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5800 if (!error && mddev->kobj.sd) {
5801 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5802 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5803 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5829 while (len && val[len-1] == '\n')
5830 len--;
5832 return -E2BIG;
5842 return -EINVAL;
5849 mddev->safemode = 1;
5850 if (mddev->external)
5851 sysfs_notify_dirent_safe(mddev->sysfs_state);
5853 md_wakeup_thread(mddev->thread);
5864 if (list_empty(&mddev->disks))
5866 return -EINVAL;
5868 if (mddev->pers)
5869 return -EBUSY;
5871 if (mddev->sysfs_active)
5872 return -EBUSY;
5877 if (!mddev->raid_disks) {
5878 if (!mddev->persistent)
5879 return -EINVAL;
5882 return -EINVAL;
5885 if (mddev->level != LEVEL_NONE)
5886 request_module("md-level-%d", mddev->level);
5887 else if (mddev->clevel[0])
5888 request_module("md-%s", mddev->clevel);
5895 mddev->has_superblocks = false;
5897 if (test_bit(Faulty, &rdev->flags))
5899 sync_blockdev(rdev->bdev);
5900 invalidate_bdev(rdev->bdev);
5901 if (mddev->ro != 1 &&
5902 (bdev_read_only(rdev->bdev) ||
5903 bdev_read_only(rdev->meta_bdev))) {
5904 mddev->ro = 1;
5905 if (mddev->gendisk)
5906 set_disk_ro(mddev->gendisk, 1);
5909 if (rdev->sb_page)
5910 mddev->has_superblocks = true;
5916 if (rdev->meta_bdev) {
5918 } else if (rdev->data_offset < rdev->sb_start) {
5919 if (mddev->dev_sectors &&
5920 rdev->data_offset + mddev->dev_sectors
5921 > rdev->sb_start) {
5924 return -EINVAL;
5927 if (rdev->sb_start + rdev->sb_size/512
5928 > rdev->data_offset) {
5931 return -EINVAL;
5934 sysfs_notify_dirent_safe(rdev->sysfs_state);
5937 if (!bioset_initialized(&mddev->bio_set)) {
5938 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5942 if (!bioset_initialized(&mddev->sync_set)) {
5943 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5949 pers = find_pers(mddev->level, mddev->clevel);
5950 if (!pers || !try_module_get(pers->owner)) {
5952 if (mddev->level != LEVEL_NONE)
5954 mddev->level);
5957 mddev->clevel);
5958 err = -EINVAL;
5962 if (mddev->level != pers->level) {
5963 mddev->level = pers->level;
5964 mddev->new_level = pers->level;
5966 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5968 if (mddev->reshape_position != MaxSector &&
5969 pers->start_reshape == NULL) {
5971 module_put(pers->owner);
5972 err = -EINVAL;
5976 if (pers->sync_request) {
5987 rdev->bdev->bd_disk ==
5988 rdev2->bdev->bd_disk) {
5991 bdevname(rdev->bdev,b),
5992 bdevname(rdev2->bdev,b2));
5998 pr_warn("True protection against single-disk failure might be compromised.\n");
6001 mddev->recovery = 0;
6002 /* may be over-ridden by personality */
6003 mddev->resync_max_sectors = mddev->dev_sectors;
6005 mddev->ok_start_degraded = start_dirty_degraded;
6007 if (start_readonly && mddev->ro == 0)
6008 mddev->ro = 2; /* read-only, but switch on first write */
6010 err = pers->run(mddev);
6012 pr_warn("md: pers->run() failed ...\n");
6013 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
6014 WARN_ONCE(!mddev->external_size,
6018 (unsigned long long)mddev->array_sectors / 2,
6019 (unsigned long long)pers->size(mddev, 0, 0) / 2);
6020 err = -EINVAL;
6022 if (err == 0 && pers->sync_request &&
6023 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
6026 bitmap = md_bitmap_create(mddev, -1);
6032 mddev->bitmap = bitmap;
6038 if (mddev->bitmap_info.max_write_behind > 0) {
6042 if (test_bit(WriteMostly, &rdev->flags) &&
6046 if (create_pool && mddev->serial_info_pool == NULL) {
6047 mddev->serial_info_pool =
6050 if (!mddev->serial_info_pool) {
6051 err = -ENOMEM;
6057 if (mddev->queue) {
6061 if (rdev->raid_disk >= 0 &&
6062 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
6067 if (mddev->degraded)
6070 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6072 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6074 if (pers->sync_request) {
6075 if (mddev->kobj.sd &&
6076 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6079 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6080 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6081 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6082 } else if (mddev->ro == 2) /* auto-readonly not meaningful */
6083 mddev->ro = 0;
6085 atomic_set(&mddev->max_corr_read_errors,
6087 mddev->safemode = 0;
6089 mddev->safemode_delay = 0;
6091 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6092 mddev->in_sync = 1;
6094 spin_lock(&mddev->lock);
6095 mddev->pers = pers;
6096 spin_unlock(&mddev->lock);
6098 if (rdev->raid_disk >= 0)
6101 if (mddev->degraded && !mddev->ro)
6103 * via sysfs - until a lack of spares is confirmed.
6105 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6106 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6108 if (mddev->sb_flags)
6116 if (mddev->private)
6117 pers->free(mddev, mddev->private);
6118 mddev->private = NULL;
6119 module_put(pers->owner);
6122 bioset_exit(&mddev->bio_set);
6123 bioset_exit(&mddev->sync_set);
6132 set_bit(MD_NOT_READY, &mddev->flags);
6148 md_wakeup_thread(mddev->thread);
6149 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
6151 set_capacity(mddev->gendisk, mddev->array_sectors);
6152 revalidate_disk_size(mddev->gendisk, true);
6153 clear_bit(MD_NOT_READY, &mddev->flags);
6154 mddev->changed = 1;
6155 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6156 sysfs_notify_dirent_safe(mddev->sysfs_state);
6157 sysfs_notify_dirent_safe(mddev->sysfs_action);
6158 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6160 clear_bit(MD_NOT_READY, &mddev->flags);
6168 if (mddev->pers->start) {
6169 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6170 md_wakeup_thread(mddev->thread);
6171 ret = mddev->pers->start(mddev);
6172 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6173 md_wakeup_thread(mddev->sync_thread);
6181 struct gendisk *disk = mddev->gendisk;
6187 if (list_empty(&mddev->disks))
6188 return -ENXIO;
6189 if (!mddev->pers)
6190 return -EINVAL;
6191 if (!mddev->ro)
6192 return -EBUSY;
6196 if (test_bit(Journal, &rdev->flags) &&
6197 !test_bit(Faulty, &rdev->flags))
6199 if (bdev_read_only(rdev->bdev))
6203 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6205 return -EINVAL;
6207 return -EROFS;
6209 mddev->safemode = 0;
6210 mddev->ro = 0;
6212 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6214 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6215 md_wakeup_thread(mddev->thread);
6216 md_wakeup_thread(mddev->sync_thread);
6217 sysfs_notify_dirent_safe(mddev->sysfs_state);
6223 mddev->array_sectors = 0;
6224 mddev->external_size = 0;
6225 mddev->dev_sectors = 0;
6226 mddev->raid_disks = 0;
6227 mddev->recovery_cp = 0;
6228 mddev->resync_min = 0;
6229 mddev->resync_max = MaxSector;
6230 mddev->reshape_position = MaxSector;
6231 mddev->external = 0;
6232 mddev->persistent = 0;
6233 mddev->level = LEVEL_NONE;
6234 mddev->clevel[0] = 0;
6235 mddev->flags = 0;
6236 mddev->sb_flags = 0;
6237 mddev->ro = 0;
6238 mddev->metadata_type[0] = 0;
6239 mddev->chunk_sectors = 0;
6240 mddev->ctime = mddev->utime = 0;
6241 mddev->layout = 0;
6242 mddev->max_disks = 0;
6243 mddev->events = 0;
6244 mddev->can_decrease_events = 0;
6245 mddev->delta_disks = 0;
6246 mddev->reshape_backwards = 0;
6247 mddev->new_level = LEVEL_NONE;
6248 mddev->new_layout = 0;
6249 mddev->new_chunk_sectors = 0;
6250 mddev->curr_resync = 0;
6251 atomic64_set(&mddev->resync_mismatches, 0);
6252 mddev->suspend_lo = mddev->suspend_hi = 0;
6253 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6254 mddev->recovery = 0;
6255 mddev->in_sync = 0;
6256 mddev->changed = 0;
6257 mddev->degraded = 0;
6258 mddev->safemode = 0;
6259 mddev->private = NULL;
6260 mddev->cluster_info = NULL;
6261 mddev->bitmap_info.offset = 0;
6262 mddev->bitmap_info.default_offset = 0;
6263 mddev->bitmap_info.default_space = 0;
6264 mddev->bitmap_info.chunksize = 0;
6265 mddev->bitmap_info.daemon_sleep = 0;
6266 mddev->bitmap_info.max_write_behind = 0;
6267 mddev->bitmap_info.nodes = 0;
6272 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6273 if (work_pending(&mddev->del_work))
6275 if (mddev->sync_thread) {
6276 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6280 del_timer_sync(&mddev->safemode_timer);
6282 if (mddev->pers && mddev->pers->quiesce) {
6283 mddev->pers->quiesce(mddev, 1);
6284 mddev->pers->quiesce(mddev, 0);
6288 if (mddev->ro == 0 &&
6289 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6290 mddev->sb_flags)) {
6293 mddev->in_sync = 1;
6297 mddev->serialize_policy = 0;
6312 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6313 mddev->pers->quiesce(mddev, 1);
6314 mddev->pers->quiesce(mddev, 0);
6316 md_unregister_thread(&mddev->thread);
6317 if (mddev->queue)
6318 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
6323 struct md_personality *pers = mddev->pers;
6326 /* Ensure ->event_work is done */
6327 if (mddev->event_work.func)
6329 spin_lock(&mddev->lock);
6330 mddev->pers = NULL;
6331 spin_unlock(&mddev->lock);
6332 pers->free(mddev, mddev->private);
6333 mddev->private = NULL;
6334 if (pers->sync_request && mddev->to_remove == NULL)
6335 mddev->to_remove = &md_redundancy_group;
6336 module_put(pers->owner);
6337 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6342 lockdep_assert_held(&mddev->reconfig_mutex);
6345 * This is called from dm-raid
6349 bioset_exit(&mddev->bio_set);
6350 bioset_exit(&mddev->sync_set);
6360 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6362 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6363 md_wakeup_thread(mddev->thread);
6365 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6366 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6367 if (mddev->sync_thread)
6370 wake_up_process(mddev->sync_thread->tsk);
6372 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6373 return -EBUSY;
6376 &mddev->recovery));
6377 wait_event(mddev->sb_wait,
6378 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6381 mutex_lock(&mddev->open_mutex);
6382 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6383 mddev->sync_thread ||
6384 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6387 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6388 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6389 md_wakeup_thread(mddev->thread);
6391 err = -EBUSY;
6394 if (mddev->pers) {
6397 err = -ENXIO;
6398 if (mddev->ro==1)
6400 mddev->ro = 1;
6401 set_disk_ro(mddev->gendisk, 1);
6402 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6403 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6404 md_wakeup_thread(mddev->thread);
6405 sysfs_notify_dirent_safe(mddev->sysfs_state);
6409 mutex_unlock(&mddev->open_mutex);
6414 * 0 - completely stop and dis-assemble array
6415 * 2 - stop but do not disassemble array
6420 struct gendisk *disk = mddev->gendisk;
6424 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6426 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6427 md_wakeup_thread(mddev->thread);
6429 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6430 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6431 if (mddev->sync_thread)
6434 wake_up_process(mddev->sync_thread->tsk);
6437 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6439 &mddev->recovery)));
6442 mutex_lock(&mddev->open_mutex);
6443 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6444 mddev->sysfs_active ||
6445 mddev->sync_thread ||
6446 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6448 mutex_unlock(&mddev->open_mutex);
6450 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6451 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6452 md_wakeup_thread(mddev->thread);
6454 return -EBUSY;
6456 if (mddev->pers) {
6457 if (mddev->ro)
6464 sysfs_notify_dirent_safe(mddev->sysfs_state);
6467 if (rdev->raid_disk >= 0)
6471 mutex_unlock(&mddev->open_mutex);
6472 mddev->changed = 1;
6475 if (mddev->ro)
6476 mddev->ro = 0;
6478 mutex_unlock(&mddev->open_mutex);
6485 if (mddev->bitmap_info.file) {
6486 struct file *f = mddev->bitmap_info.file;
6487 spin_lock(&mddev->lock);
6488 mddev->bitmap_info.file = NULL;
6489 spin_unlock(&mddev->lock);
6492 mddev->bitmap_info.offset = 0;
6497 if (mddev->hold_active == UNTIL_STOP)
6498 mddev->hold_active = 0;
6501 sysfs_notify_dirent_safe(mddev->sysfs_state);
6511 if (list_empty(&mddev->disks))
6518 pr_cont("<%s>", bdevname(rdev->bdev,b));
6555 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6560 bdevname(rdev->bdev,b));
6561 list_move(&rdev->same_set, &candidates);
6570 rdev0->preferred_minor << MdpMinorShift);
6573 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6576 if (rdev0->preferred_minor != unit) {
6578 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6589 else if (mddev->raid_disks || mddev->major_version
6590 || !list_empty(&mddev->disks)) {
6592 mdname(mddev), bdevname(rdev0->bdev,b));
6596 mddev->persistent = 1;
6598 list_del_init(&rdev->same_set);
6609 list_del_init(&rdev->same_set);
6627 return -EFAULT;
6642 if (test_bit(Faulty, &rdev->flags))
6646 if (test_bit(In_sync, &rdev->flags))
6648 else if (test_bit(Journal, &rdev->flags))
6657 info.major_version = mddev->major_version;
6658 info.minor_version = mddev->minor_version;
6660 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6661 info.level = mddev->level;
6662 info.size = mddev->dev_sectors / 2;
6663 if (info.size != mddev->dev_sectors / 2) /* overflow */
6664 info.size = -1;
6666 info.raid_disks = mddev->raid_disks;
6667 info.md_minor = mddev->md_minor;
6668 info.not_persistent= !mddev->persistent;
6670 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6672 if (mddev->in_sync)
6674 if (mddev->bitmap && mddev->bitmap_info.offset)
6683 info.layout = mddev->layout;
6684 info.chunk_size = mddev->chunk_sectors << 9;
6687 return -EFAULT;
6700 return -ENOMEM;
6703 spin_lock(&mddev->lock);
6705 if (mddev->bitmap_info.file) {
6706 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6707 sizeof(file->pathname));
6711 memmove(file->pathname, ptr,
6712 sizeof(file->pathname)-(ptr-file->pathname));
6714 spin_unlock(&mddev->lock);
6718 err = -EFAULT;
6730 return -EFAULT;
6735 info.major = MAJOR(rdev->bdev->bd_dev);
6736 info.minor = MINOR(rdev->bdev->bd_dev);
6737 info.raid_disk = rdev->raid_disk;
6739 if (test_bit(Faulty, &rdev->flags))
6741 else if (test_bit(In_sync, &rdev->flags)) {
6745 if (test_bit(Journal, &rdev->flags))
6747 if (test_bit(WriteMostly, &rdev->flags))
6749 if (test_bit(FailFast, &rdev->flags))
6753 info.raid_disk = -1;
6759 return -EFAULT;
6768 dev_t dev = MKDEV(info->major,info->minor);
6771 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6774 return -EINVAL;
6777 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6778 return -EOVERFLOW;
6780 if (!mddev->raid_disks) {
6783 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6789 if (!list_empty(&mddev->disks)) {
6791 = list_entry(mddev->disks.next,
6793 err = super_types[mddev->major_version]
6794 .load_super(rdev, rdev0, mddev->minor_version);
6797 bdevname(rdev->bdev,b),
6798 bdevname(rdev0->bdev,b2));
6800 return -EINVAL;
6814 if (mddev->pers) {
6816 if (!mddev->pers->hot_add_disk) {
6819 return -EINVAL;
6821 if (mddev->persistent)
6822 rdev = md_import_device(dev, mddev->major_version,
6823 mddev->minor_version);
6825 rdev = md_import_device(dev, -1, -1);
6832 if (!mddev->persistent) {
6833 if (info->state & (1<<MD_DISK_SYNC) &&
6834 info->raid_disk < mddev->raid_disks) {
6835 rdev->raid_disk = info->raid_disk;
6836 set_bit(In_sync, &rdev->flags);
6837 clear_bit(Bitmap_sync, &rdev->flags);
6839 rdev->raid_disk = -1;
6840 rdev->saved_raid_disk = rdev->raid_disk;
6842 super_types[mddev->major_version].
6844 if ((info->state & (1<<MD_DISK_SYNC)) &&
6845 rdev->raid_disk != info->raid_disk) {
6846 /* This was a hot-add request, but events doesn't
6850 return -EINVAL;
6853 clear_bit(In_sync, &rdev->flags); /* just to be sure */
6854 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6855 set_bit(WriteMostly, &rdev->flags);
6857 clear_bit(WriteMostly, &rdev->flags);
6858 if (info->state & (1<<MD_DISK_FAILFAST))
6859 set_bit(FailFast, &rdev->flags);
6861 clear_bit(FailFast, &rdev->flags);
6863 if (info->state & (1<<MD_DISK_JOURNAL)) {
6869 if (test_bit(Journal, &rdev2->flags)) {
6874 if (has_journal || mddev->bitmap) {
6876 return -EBUSY;
6878 set_bit(Journal, &rdev->flags);
6884 if (info->state & (1 << MD_DISK_CANDIDATE))
6885 set_bit(Candidate, &rdev->flags);
6886 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6887 /* --add initiated by this node */
6888 err = md_cluster_ops->add_new_disk(mddev, rdev);
6896 rdev->raid_disk = -1;
6903 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6905 err = md_cluster_ops->new_disk_ack(mddev,
6912 md_cluster_ops->add_new_disk_cancel(mddev);
6926 if (mddev->major_version != 0) {
6928 return -EINVAL;
6931 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6933 rdev = md_import_device(dev, -1, 0);
6939 rdev->desc_nr = info->number;
6940 if (info->raid_disk < mddev->raid_disks)
6941 rdev->raid_disk = info->raid_disk;
6943 rdev->raid_disk = -1;
6945 if (rdev->raid_disk < mddev->raid_disks)
6946 if (info->state & (1<<MD_DISK_SYNC))
6947 set_bit(In_sync, &rdev->flags);
6949 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6950 set_bit(WriteMostly, &rdev->flags);
6951 if (info->state & (1<<MD_DISK_FAILFAST))
6952 set_bit(FailFast, &rdev->flags);
6954 if (!mddev->persistent) {
6956 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6958 rdev->sb_start = calc_dev_sboffset(rdev);
6959 rdev->sectors = rdev->sb_start;
6976 if (!mddev->pers)
6977 return -ENODEV;
6981 return -ENXIO;
6983 if (rdev->raid_disk < 0)
6986 clear_bit(Blocked, &rdev->flags);
6989 if (rdev->raid_disk >= 0)
6994 if (md_cluster_ops->remove_disk(mddev, rdev))
6999 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7000 if (mddev->thread)
7001 md_wakeup_thread(mddev->thread);
7009 bdevname(rdev->bdev,b), mdname(mddev));
7010 return -EBUSY;
7019 if (!mddev->pers)
7020 return -ENODEV;
7022 if (mddev->major_version != 0) {
7023 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
7025 return -EINVAL;
7027 if (!mddev->pers->hot_add_disk) {
7030 return -EINVAL;
7033 rdev = md_import_device(dev, -1, 0);
7037 return -EINVAL;
7040 if (mddev->persistent)
7041 rdev->sb_start = calc_dev_sboffset(rdev);
7043 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
7045 rdev->sectors = rdev->sb_start;
7047 if (test_bit(Faulty, &rdev->flags)) {
7048 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
7049 bdevname(rdev->bdev,b), mdname(mddev));
7050 err = -EINVAL;
7054 clear_bit(In_sync, &rdev->flags);
7055 rdev->desc_nr = -1;
7056 rdev->saved_raid_disk = -1;
7066 rdev->raid_disk = -1;
7068 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7069 if (!mddev->thread)
7075 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7076 md_wakeup_thread(mddev->thread);
7089 if (mddev->pers) {
7090 if (!mddev->pers->quiesce || !mddev->thread)
7091 return -EBUSY;
7092 if (mddev->recovery || mddev->sync_thread)
7093 return -EBUSY;
7101 if (mddev->bitmap || mddev->bitmap_info.file)
7102 return -EEXIST; /* cannot add when bitmap is present */
7108 return -EBADF;
7111 inode = f->f_mapping->host;
7112 if (!S_ISREG(inode->i_mode)) {
7115 err = -EBADF;
7116 } else if (!(f->f_mode & FMODE_WRITE)) {
7119 err = -EBADF;
7120 } else if (atomic_read(&inode->i_writecount) != 1) {
7123 err = -EBUSY;
7129 mddev->bitmap_info.file = f;
7130 mddev->bitmap_info.offset = 0; /* file overrides offset */
7131 } else if (mddev->bitmap == NULL)
7132 return -ENOENT; /* cannot remove what isn't there */
7134 if (mddev->pers) {
7138 bitmap = md_bitmap_create(mddev, -1);
7141 mddev->bitmap = bitmap;
7147 fd = -1;
7157 struct file *f = mddev->bitmap_info.file;
7159 spin_lock(&mddev->lock);
7160 mddev->bitmap_info.file = NULL;
7161 spin_unlock(&mddev->lock);
7175 * This will always create an array with a type-0.90.0 superblock.
7178 * use to determine which style super-blocks are to be found on the devices.
7184 if (info->raid_disks == 0) {
7186 if (info->major_version < 0 ||
7187 info->major_version >= ARRAY_SIZE(super_types) ||
7188 super_types[info->major_version].name == NULL) {
7189 /* maybe try to auto-load a module? */
7191 info->major_version);
7192 return -EINVAL;
7194 mddev->major_version = info->major_version;
7195 mddev->minor_version = info->minor_version;
7196 mddev->patch_version = info->patch_version;
7197 mddev->persistent = !info->not_persistent;
7201 mddev->ctime = ktime_get_real_seconds();
7204 mddev->major_version = MD_MAJOR_VERSION;
7205 mddev->minor_version = MD_MINOR_VERSION;
7206 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7207 mddev->ctime = ktime_get_real_seconds();
7209 mddev->level = info->level;
7210 mddev->clevel[0] = 0;
7211 mddev->dev_sectors = 2 * (sector_t)info->size;
7212 mddev->raid_disks = info->raid_disks;
7216 if (info->state & (1<<MD_SB_CLEAN))
7217 mddev->recovery_cp = MaxSector;
7219 mddev->recovery_cp = 0;
7220 mddev->persistent = ! info->not_persistent;
7221 mddev->external = 0;
7223 mddev->layout = info->layout;
7224 if (mddev->level == 0)
7226 mddev->layout = -1;
7227 mddev->chunk_sectors = info->chunk_size >> 9;
7229 if (mddev->persistent) {
7230 mddev->max_disks = MD_SB_DISKS;
7231 mddev->flags = 0;
7232 mddev->sb_flags = 0;
7234 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7236 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7237 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7238 mddev->bitmap_info.offset = 0;
7240 mddev->reshape_position = MaxSector;
7245 get_random_bytes(mddev->uuid, 16);
7247 mddev->new_level = mddev->level;
7248 mddev->new_chunk_sectors = mddev->chunk_sectors;
7249 mddev->new_layout = mddev->layout;
7250 mddev->delta_disks = 0;
7251 mddev->reshape_backwards = 0;
7258 lockdep_assert_held(&mddev->reconfig_mutex);
7260 if (mddev->external_size)
7263 mddev->array_sectors = array_sectors;
7272 sector_t old_dev_sectors = mddev->dev_sectors;
7274 if (mddev->pers->resize == NULL)
7275 return -EINVAL;
7285 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7286 mddev->sync_thread)
7287 return -EBUSY;
7288 if (mddev->ro)
7289 return -EROFS;
7292 sector_t avail = rdev->sectors;
7297 return -ENOSPC;
7299 rv = mddev->pers->resize(mddev, num_sectors);
7302 md_cluster_ops->update_size(mddev, old_dev_sectors);
7303 else if (mddev->queue) {
7304 set_capacity(mddev->gendisk, mddev->array_sectors);
7305 revalidate_disk_size(mddev->gendisk, true);
7316 if (mddev->pers->check_reshape == NULL)
7317 return -EINVAL;
7318 if (mddev->ro)
7319 return -EROFS;
7321 (mddev->max_disks && raid_disks >= mddev->max_disks))
7322 return -EINVAL;
7323 if (mddev->sync_thread ||
7324 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7325 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7326 mddev->reshape_position != MaxSector)
7327 return -EBUSY;
7330 if (mddev->raid_disks < raid_disks &&
7331 rdev->data_offset < rdev->new_data_offset)
7332 return -EINVAL;
7333 if (mddev->raid_disks > raid_disks &&
7334 rdev->data_offset > rdev->new_data_offset)
7335 return -EINVAL;
7338 mddev->delta_disks = raid_disks - mddev->raid_disks;
7339 if (mddev->delta_disks < 0)
7340 mddev->reshape_backwards = 1;
7341 else if (mddev->delta_disks > 0)
7342 mddev->reshape_backwards = 0;
7344 rv = mddev->pers->check_reshape(mddev);
7346 mddev->delta_disks = 0;
7347 mddev->reshape_backwards = 0;
7354 * on-line array.
7367 if (mddev->bitmap && mddev->bitmap_info.offset)
7370 if (mddev->major_version != info->major_version ||
7371 mddev->minor_version != info->minor_version ||
7372 /* mddev->patch_version != info->patch_version || */
7373 mddev->ctime != info->ctime ||
7374 mddev->level != info->level ||
7375 /* mddev->layout != info->layout || */
7376 mddev->persistent != !info->not_persistent ||
7377 mddev->chunk_sectors != info->chunk_size >> 9 ||
7379 ((state^info->state) & 0xfffffe00)
7381 return -EINVAL;
7383 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7385 if (mddev->raid_disks != info->raid_disks)
7387 if (mddev->layout != info->layout)
7389 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7394 return -EINVAL;
7396 if (mddev->layout != info->layout) {
7401 if (mddev->pers->check_reshape == NULL)
7402 return -EINVAL;
7404 mddev->new_layout = info->layout;
7405 rv = mddev->pers->check_reshape(mddev);
7407 mddev->new_layout = mddev->layout;
7411 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7412 rv = update_size(mddev, (sector_t)info->size * 2);
7414 if (mddev->raid_disks != info->raid_disks)
7415 rv = update_raid_disks(mddev, info->raid_disks);
7417 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7418 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7419 rv = -EINVAL;
7422 if (mddev->recovery || mddev->sync_thread) {
7423 rv = -EBUSY;
7426 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7429 if (mddev->bitmap) {
7430 rv = -EEXIST;
7433 if (mddev->bitmap_info.default_offset == 0) {
7434 rv = -EINVAL;
7437 mddev->bitmap_info.offset =
7438 mddev->bitmap_info.default_offset;
7439 mddev->bitmap_info.space =
7440 mddev->bitmap_info.default_space;
7441 bitmap = md_bitmap_create(mddev, -1);
7444 mddev->bitmap = bitmap;
7453 if (!mddev->bitmap) {
7454 rv = -ENOENT;
7457 if (mddev->bitmap->storage.file) {
7458 rv = -EINVAL;
7461 if (mddev->bitmap_info.nodes) {
7463 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7465 rv = -EPERM;
7466 md_cluster_ops->unlock_all_bitmaps(mddev);
7470 mddev->bitmap_info.nodes = 0;
7471 md_cluster_ops->leave(mddev);
7473 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7478 mddev->bitmap_info.offset = 0;
7492 if (mddev->pers == NULL)
7493 return -ENODEV;
7498 err = -ENODEV;
7501 if (!test_bit(Faulty, &rdev->flags))
7502 err = -EBUSY;
7512 * dosfs just mad... ;-)
7516 struct mddev *mddev = bdev->bd_disk->private_data;
7518 geo->heads = 2;
7519 geo->sectors = 4;
7520 geo->cylinders = mddev->array_sectors / 8;
7559 return -ENOTTY;
7568 return -EACCES;
7586 mddev = bdev->bd_disk->private_data;
7596 if (!mddev->raid_disks && !mddev->external)
7597 err = -ENODEV;
7603 if (!mddev->raid_disks && !mddev->external)
7604 err = -ENODEV;
7624 wait_event_interruptible_timeout(mddev->sb_wait,
7626 &mddev->recovery),
7629 /* Need to flush page cache, and ensure no-one else opens
7632 mutex_lock(&mddev->open_mutex);
7633 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7634 mutex_unlock(&mddev->open_mutex);
7635 err = -EBUSY;
7638 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7639 mutex_unlock(&mddev->open_mutex);
7640 err = -EBUSY;
7644 mutex_unlock(&mddev->open_mutex);
7659 err = -EFAULT;
7662 if (mddev->pers) {
7670 if (!list_empty(&mddev->disks)) {
7672 err = -EBUSY;
7675 if (mddev->raid_disks) {
7677 err = -EBUSY;
7693 if ((!mddev->raid_disks && !mddev->external)
7697 err = -ENODEV;
7702 * Commands even a read-only array can execute:
7722 /* We can support ADD_NEW_DISK on read-only arrays
7723 * only if we are re-adding a preexisting device.
7724 * So require mddev->pers and MD_DISK_SYNC.
7726 if (mddev->pers) {
7729 err = -EFAULT;
7731 /* Need to clear read-only for this */
7741 err = -EFAULT;
7744 err = -EINVAL;
7746 /* if the bdev is going readonly the value of mddev->ro
7753 if (mddev->ro != 1)
7759 if (mddev->pers) {
7762 mddev->ro = 2;
7763 set_disk_ro(mddev->gendisk, 0);
7771 * superblock, so we do not allow them on read-only arrays.
7773 if (mddev->ro && mddev->pers) {
7774 if (mddev->ro == 2) {
7775 mddev->ro = 0;
7776 sysfs_notify_dirent_safe(mddev->sysfs_state);
7777 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7779 /* If a device failed while we were read-only, we
7782 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7784 wait_event(mddev->sb_wait,
7785 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7786 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7790 err = -EROFS;
7800 err = -EFAULT;
7808 md_cluster_ops->new_disk_ack(mddev, false);
7810 err = -EINVAL;
7826 err = -EINVAL;
7831 if (mddev->hold_active == UNTIL_IOCTL &&
7832 err != -EINVAL)
7833 mddev->hold_active = 0;
7837 clear_bit(MD_CLOSING, &mddev->flags);
7866 struct mddev *mddev = mddev_find(bdev->bd_dev);
7870 return -ENODEV;
7872 if (mddev->gendisk != bdev->bd_disk) {
7877 /* Wait until bdev->bd_disk is definitely gone */
7878 if (work_pending(&mddev->del_work))
7880 return -EBUSY;
7882 BUG_ON(mddev != bdev->bd_disk->private_data);
7884 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7887 if (test_bit(MD_CLOSING, &mddev->flags)) {
7888 mutex_unlock(&mddev->open_mutex);
7889 err = -ENODEV;
7894 atomic_inc(&mddev->openers);
7895 mutex_unlock(&mddev->open_mutex);
7906 struct mddev *mddev = disk->private_data;
7909 atomic_dec(&mddev->openers);
7915 struct mddev *mddev = disk->private_data;
7918 if (mddev->changed)
7920 mddev->changed = 0;
7943 * md_thread is a 'system-thread', it's priority should be very
7958 * we don't add to the load-average.
7966 (thread->wqueue,
7967 test_bit(THREAD_WAKEUP, &thread->flags)
7969 thread->timeout);
7971 clear_bit(THREAD_WAKEUP, &thread->flags);
7975 thread->run(thread);
7984 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7985 set_bit(THREAD_WAKEUP, &thread->flags);
7986 wake_up(&thread->wqueue);
8000 init_waitqueue_head(&thread->wqueue);
8002 thread->run = run;
8003 thread->mddev = mddev;
8004 thread->timeout = MAX_SCHEDULE_TIMEOUT;
8005 thread->tsk = kthread_run(md_thread, thread,
8007 mdname(thread->mddev),
8009 if (IS_ERR(thread->tsk)) {
8023 * non-existent thread
8034 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
8035 kthread_stop(thread->tsk);
8042 if (!rdev || test_bit(Faulty, &rdev->flags))
8045 if (!mddev->pers || !mddev->pers->error_handler)
8047 mddev->pers->error_handler(mddev,rdev);
8048 if (mddev->degraded)
8049 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8050 sysfs_notify_dirent_safe(rdev->sysfs_state);
8051 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8052 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8053 md_wakeup_thread(mddev->thread);
8054 if (mddev->event_work.func)
8055 queue_work(md_misc_wq, &mddev->event_work);
8073 bdevname(rdev->bdev,b));
8089 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8090 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8091 max_sectors = mddev->resync_max_sectors;
8093 max_sectors = mddev->dev_sectors;
8095 resync = mddev->curr_resync;
8097 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8103 resync -= atomic_read(&mddev->recovery_active);
8106 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8110 if (rdev->raid_disk >= 0 &&
8111 !test_bit(Faulty, &rdev->flags) &&
8112 rdev->recovery_offset != MaxSector &&
8113 rdev->recovery_offset) {
8117 if (mddev->reshape_position != MaxSector)
8123 if (mddev->recovery_cp < MaxSector) {
8150 int i, x = per_milli/50, y = 20-x;
8160 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8162 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8164 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8187 dt = ((jiffies - mddev->resync_mark) / HZ);
8190 curr_mark_cnt = mddev->curr_mark_cnt;
8191 recovery_active = atomic_read(&mddev->recovery_active);
8192 resync_mark_cnt = mddev->resync_mark_cnt;
8195 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8197 rt = max_sectors - resync; /* number of remaining sectors */
8221 if (!l--)
8227 if (!l--) {
8234 if (!l--)
8252 tmp = mddev->all_mddevs.next;
8286 seq_printf(seq, "[%s] ", pers->name);
8290 seq->poll_event = atomic_read(&md_event_count);
8298 spin_lock(&mddev->lock);
8299 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8301 mddev->pers ? "" : "in");
8302 if (mddev->pers) {
8303 if (mddev->ro==1)
8304 seq_printf(seq, " (read-only)");
8305 if (mddev->ro==2)
8306 seq_printf(seq, " (auto-read-only)");
8307 seq_printf(seq, " %s", mddev->pers->name);
8315 bdevname(rdev->bdev,b), rdev->desc_nr);
8316 if (test_bit(WriteMostly, &rdev->flags))
8318 if (test_bit(Journal, &rdev->flags))
8320 if (test_bit(Faulty, &rdev->flags)) {
8324 if (rdev->raid_disk < 0)
8326 if (test_bit(Replacement, &rdev->flags))
8328 sectors += rdev->sectors;
8332 if (!list_empty(&mddev->disks)) {
8333 if (mddev->pers)
8336 mddev->array_sectors / 2);
8341 if (mddev->persistent) {
8342 if (mddev->major_version != 0 ||
8343 mddev->minor_version != 90) {
8345 mddev->major_version,
8346 mddev->minor_version);
8348 } else if (mddev->external)
8350 mddev->metadata_type);
8352 seq_printf(seq, " super non-persistent");
8354 if (mddev->pers) {
8355 mddev->pers->status(seq, mddev);
8357 if (mddev->pers->sync_request) {
8364 md_bitmap_status(seq, mddev->bitmap);
8368 spin_unlock(&mddev->lock);
8389 seq = file->private_data;
8390 seq->poll_event = atomic_read(&md_event_count);
8397 struct seq_file *seq = filp->private_data;
8407 if (seq->poll_event != atomic_read(&md_event_count))
8423 p->name, p->level);
8425 list_add_tail(&p->list, &pers_list);
8433 pr_debug("md: %s personality unregistered\n", p->name);
8435 list_del_init(&p->list);
8447 ret = -EALREADY;
8470 request_module("md-cluster");
8474 pr_warn("can't find md-cluster module or get it's reference.\n");
8476 return -ENOENT;
8480 ret = md_cluster_ops->join(mddev, nodes);
8482 mddev->safemode_delay = 0;
8490 md_cluster_ops->leave(mddev);
8503 struct gendisk *disk = rdev->bdev->bd_disk;
8504 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8505 atomic_read(&disk->sync_io);
8511 * non-sync IO will cause disk_stat to increase without
8515 * the array to appear non-idle, and resync will slow
8519 * completing might cause the array to appear non-idle
8521 * not have been non-resync activity. This will only
8528 if (init || curr_events - rdev->last_events > 64) {
8529 rdev->last_events = curr_events;
8540 atomic_sub(blocks, &mddev->recovery_active);
8541 wake_up(&mddev->recovery_wait);
8543 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8544 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8545 md_wakeup_thread(mddev->thread);
8565 BUG_ON(mddev->ro == 1);
8566 if (mddev->ro == 2) {
8568 mddev->ro = 0;
8569 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8570 md_wakeup_thread(mddev->thread);
8571 md_wakeup_thread(mddev->sync_thread);
8575 percpu_ref_get(&mddev->writes_pending);
8577 if (mddev->safemode == 1)
8578 mddev->safemode = 0;
8579 /* sync_checkers is always 0 when writes_pending is in per-cpu mode */
8580 if (mddev->in_sync || mddev->sync_checkers) {
8581 spin_lock(&mddev->lock);
8582 if (mddev->in_sync) {
8583 mddev->in_sync = 0;
8584 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8585 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8586 md_wakeup_thread(mddev->thread);
8589 spin_unlock(&mddev->lock);
8593 sysfs_notify_dirent_safe(mddev->sysfs_state);
8594 if (!mddev->has_superblocks)
8596 wait_event(mddev->sb_wait,
8597 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8598 mddev->suspended);
8599 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8600 percpu_ref_put(&mddev->writes_pending);
8619 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8620 percpu_ref_get(&mddev->writes_pending);
8626 percpu_ref_put(&mddev->writes_pending);
8628 if (mddev->safemode == 2)
8629 md_wakeup_thread(mddev->thread);
8630 else if (mddev->safemode_delay)
8632 * every ->safemode_delay jiffies
8634 mod_timer(&mddev->safemode_timer,
8635 roundup(jiffies, mddev->safemode_delay) +
8636 mddev->safemode_delay);
8649 if (!mddev->pers)
8651 if (mddev->ro)
8653 if (!mddev->pers->sync_request)
8656 spin_lock(&mddev->lock);
8657 if (mddev->in_sync) {
8658 mddev->in_sync = 0;
8659 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8660 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8661 if (mddev->safemode_delay &&
8662 mddev->safemode == 0)
8663 mddev->safemode = 1;
8664 spin_unlock(&mddev->lock);
8666 sysfs_notify_dirent_safe(mddev->sysfs_state);
8668 wait_event(mddev->sb_wait,
8669 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8671 spin_unlock(&mddev->lock);
8680 struct mddev *mddev = thread->mddev;
8697 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8698 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8700 if (mddev->ro) {/* never try to sync a read-only array */
8701 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8706 ret = md_cluster_ops->resync_start(mddev);
8710 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8711 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8712 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8713 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8714 && ((unsigned long long)mddev->curr_resync_completed
8715 < (unsigned long long)mddev->resync_max_sectors))
8719 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8720 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8721 desc = "data-check";
8723 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8724 desc = "requested-resync";
8728 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8733 mddev->last_sync_action = action ?: desc;
8740 * other == active in resync - this many blocks
8752 int mddev2_minor = -1;
8753 mddev->curr_resync = 2;
8756 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8761 if (!mddev->parallel_resync
8762 && mddev2->curr_resync
8765 if (mddev < mddev2 && mddev->curr_resync == 2) {
8767 mddev->curr_resync = 1;
8770 if (mddev > mddev2 && mddev->curr_resync == 1)
8780 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8781 mddev2->curr_resync >= mddev->curr_resync) {
8782 if (mddev2_minor != mddev2->md_minor) {
8783 mddev2_minor = mddev2->md_minor;
8798 } while (mddev->curr_resync < 2);
8801 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8805 max_sectors = mddev->resync_max_sectors;
8806 atomic64_set(&mddev->resync_mismatches, 0);
8808 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8809 j = mddev->resync_min;
8810 else if (!mddev->bitmap)
8811 j = mddev->recovery_cp;
8813 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8814 max_sectors = mddev->resync_max_sectors;
8821 mddev->reshape_position != MaxSector)
8822 j = mddev->reshape_position;
8825 max_sectors = mddev->dev_sectors;
8829 if (rdev->raid_disk >= 0 &&
8830 !test_bit(Journal, &rdev->flags) &&
8831 !test_bit(Faulty, &rdev->flags) &&
8832 !test_bit(In_sync, &rdev->flags) &&
8833 rdev->recovery_offset < j)
8834 j = rdev->recovery_offset;
8845 if (mddev->bitmap) {
8846 mddev->pers->quiesce(mddev, 1);
8847 mddev->pers->quiesce(mddev, 0);
8864 mddev->resync_mark = mark[last_mark];
8865 mddev->resync_mark_cnt = mark_cnt[last_mark];
8874 atomic_set(&mddev->recovery_active, 0);
8880 mddev->curr_resync = j;
8882 mddev->curr_resync = 3; /* no longer delayed */
8883 mddev->curr_resync_completed = j;
8884 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8894 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8895 ((mddev->curr_resync > mddev->curr_resync_completed &&
8896 (mddev->curr_resync - mddev->curr_resync_completed)
8899 (j - mddev->curr_resync_completed)*2
8900 >= mddev->resync_max - mddev->curr_resync_completed ||
8901 mddev->curr_resync_completed > mddev->resync_max
8904 wait_event(mddev->recovery_wait,
8905 atomic_read(&mddev->recovery_active) == 0);
8906 mddev->curr_resync_completed = j;
8907 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8908 j > mddev->recovery_cp)
8909 mddev->recovery_cp = j;
8911 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8912 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8915 while (j >= mddev->resync_max &&
8916 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8917 /* As this condition is controlled by user-space,
8922 wait_event_interruptible(mddev->recovery_wait,
8923 mddev->resync_max > j
8925 &mddev->recovery));
8928 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8931 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8933 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8939 atomic_add(sectors, &mddev->recovery_active);
8942 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8950 mddev->curr_resync = j;
8951 mddev->curr_mark_cnt = io_sectors;
8967 mddev->resync_mark = mark[next];
8968 mddev->resync_mark_cnt = mark_cnt[next];
8970 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8974 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8979 * the 'hard' speed limit, or the system was IO-idle for
8981 * the system might be non-idle CPU-wise, but we only care
8987 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8988 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8989 /((jiffies-mddev->resync_mark)/HZ +1) +1;
9001 wait_event(mddev->recovery_wait,
9002 !atomic_read(&mddev->recovery_active));
9007 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
9013 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
9015 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9016 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9017 mddev->curr_resync > 3) {
9018 mddev->curr_resync_completed = mddev->curr_resync;
9019 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9021 mddev->pers->sync_request(mddev, max_sectors, &skipped);
9023 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9024 mddev->curr_resync > 3) {
9025 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9026 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9027 if (mddev->curr_resync >= mddev->recovery_cp) {
9031 &mddev->recovery))
9032 mddev->recovery_cp =
9033 mddev->curr_resync_completed;
9035 mddev->recovery_cp =
9036 mddev->curr_resync;
9039 mddev->recovery_cp = MaxSector;
9041 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9042 mddev->curr_resync = MaxSector;
9043 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9044 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9047 if (rdev->raid_disk >= 0 &&
9048 mddev->delta_disks >= 0 &&
9049 !test_bit(Journal, &rdev->flags) &&
9050 !test_bit(Faulty, &rdev->flags) &&
9051 !test_bit(In_sync, &rdev->flags) &&
9052 rdev->recovery_offset < mddev->curr_resync)
9053 rdev->recovery_offset = mddev->curr_resync;
9062 set_mask_bits(&mddev->sb_flags, 0,
9065 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9066 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9067 mddev->delta_disks > 0 &&
9068 mddev->pers->finish_reshape &&
9069 mddev->pers->size &&
9070 mddev->queue) {
9072 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9075 set_capacity(mddev->gendisk, mddev->array_sectors);
9076 revalidate_disk_size(mddev->gendisk, true);
9080 spin_lock(&mddev->lock);
9081 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9083 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9084 mddev->resync_min = 0;
9085 mddev->resync_max = MaxSector;
9086 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9087 mddev->resync_min = mddev->curr_resync_completed;
9088 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9089 mddev->curr_resync = 0;
9090 spin_unlock(&mddev->lock);
9093 md_wakeup_thread(mddev->thread);
9106 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9112 rdev->raid_disk >= 0 &&
9113 !test_bit(Blocked, &rdev->flags) &&
9114 test_bit(Faulty, &rdev->flags) &&
9115 atomic_read(&rdev->nr_pending)==0) {
9116 /* Faulty non-Blocked devices with nr_pending == 0
9122 set_bit(RemoveSynchronized, &rdev->flags);
9130 rdev->raid_disk >= 0 &&
9131 !test_bit(Blocked, &rdev->flags) &&
9132 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9133 (!test_bit(In_sync, &rdev->flags) &&
9134 !test_bit(Journal, &rdev->flags))) &&
9135 atomic_read(&rdev->nr_pending)==0)) {
9136 if (mddev->pers->hot_remove_disk(
9139 rdev->saved_raid_disk = rdev->raid_disk;
9140 rdev->raid_disk = -1;
9144 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9145 clear_bit(RemoveSynchronized, &rdev->flags);
9148 if (removed && mddev->kobj.sd)
9149 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9157 if (test_bit(Candidate, &rdev->flags))
9159 if (rdev->raid_disk >= 0 &&
9160 !test_bit(In_sync, &rdev->flags) &&
9161 !test_bit(Journal, &rdev->flags) &&
9162 !test_bit(Faulty, &rdev->flags))
9164 if (rdev->raid_disk >= 0)
9166 if (test_bit(Faulty, &rdev->flags))
9168 if (!test_bit(Journal, &rdev->flags)) {
9169 if (mddev->ro &&
9170 ! (rdev->saved_raid_disk >= 0 &&
9171 !test_bit(Bitmap_sync, &rdev->flags)))
9174 rdev->recovery_offset = 0;
9176 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9179 if (!test_bit(Journal, &rdev->flags))
9182 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9187 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9195 mddev->sync_thread = md_register_thread(md_do_sync,
9198 if (!mddev->sync_thread) {
9202 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9203 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9204 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9205 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9206 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9209 &mddev->recovery))
9210 if (mddev->sysfs_action)
9211 sysfs_notify_dirent_safe(mddev->sysfs_action);
9213 md_wakeup_thread(mddev->sync_thread);
9214 sysfs_notify_dirent_safe(mddev->sysfs_action);
9219 * This routine is regularly called by all per-raid-array threads to
9220 * deal with generic issues like resync and super-block update.
9227 * "->recovery" and create a thread at ->sync_thread.
9238 * 6/ If array has spares or is not in-sync, start a resync thread.
9242 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9243 /* Write superblock - thread that called mddev_suspend()
9246 set_bit(MD_UPDATING_SB, &mddev->flags);
9248 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9250 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9251 wake_up(&mddev->sb_wait);
9254 if (mddev->suspended)
9257 if (mddev->bitmap)
9261 if (mddev->pers->sync_request && !mddev->external) {
9264 mddev->safemode = 2;
9269 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9272 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9273 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9274 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9275 (mddev->external == 0 && mddev->safemode == 1) ||
9276 (mddev->safemode == 2
9277 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9283 bool try_set_sync = mddev->safemode != 0;
9285 if (!mddev->external && mddev->safemode == 1)
9286 mddev->safemode = 0;
9288 if (mddev->ro) {
9290 if (!mddev->external && mddev->in_sync)
9297 clear_bit(Blocked, &rdev->flags);
9298 /* On a read-only array we can:
9299 * - remove failed devices
9300 * - add already-in_sync devices if the array itself
9301 * is in-sync.
9302 * As we only add devices that are already in-sync,
9307 * ->spare_active and clear saved_raid_disk
9309 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9311 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9312 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9313 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9323 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9324 rdev->raid_disk < 0)
9329 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9330 spin_lock(&mddev->lock);
9332 spin_unlock(&mddev->lock);
9335 if (mddev->sb_flags)
9338 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9339 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9341 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9344 if (mddev->sync_thread) {
9351 mddev->curr_resync_completed = 0;
9352 spin_lock(&mddev->lock);
9353 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9354 spin_unlock(&mddev->lock);
9358 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9359 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9361 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9362 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9367 * Spares are also removed and re-added, to allow
9368 * the personality to fail the re-add.
9371 if (mddev->reshape_position != MaxSector) {
9372 if (mddev->pers->check_reshape == NULL ||
9373 mddev->pers->check_reshape(mddev) != 0)
9376 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9377 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9379 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9380 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9381 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9382 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9383 } else if (mddev->recovery_cp < MaxSector) {
9384 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9385 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9386 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9390 if (mddev->pers->sync_request) {
9396 md_bitmap_write_all(mddev->bitmap);
9398 INIT_WORK(&mddev->del_work, md_start_sync);
9399 queue_work(md_misc_wq, &mddev->del_work);
9403 if (!mddev->sync_thread) {
9404 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9407 &mddev->recovery))
9408 if (mddev->sysfs_action)
9409 sysfs_notify_dirent_safe(mddev->sysfs_action);
9412 wake_up(&mddev->sb_wait);
9421 sector_t old_dev_sectors = mddev->dev_sectors;
9425 md_unregister_thread(&mddev->sync_thread);
9426 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9427 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9428 mddev->degraded != mddev->raid_disks) {
9431 if (mddev->pers->spare_active(mddev)) {
9432 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9433 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9436 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9437 mddev->pers->finish_reshape) {
9438 mddev->pers->finish_reshape(mddev);
9443 /* If array is no-longer degraded, then any saved_raid_disk
9446 if (!mddev->degraded)
9448 rdev->saved_raid_disk = -1;
9454 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9455 md_cluster_ops->resync_finish(mddev);
9456 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9457 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9458 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9459 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9460 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9461 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9463 * We call md_cluster_ops->update_size here because sync_size could
9468 && !test_bit(MD_CLOSING, &mddev->flags))
9469 md_cluster_ops->update_size(mddev, old_dev_sectors);
9472 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9473 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9474 sysfs_notify_dirent_safe(mddev->sysfs_action);
9476 if (mddev->event_work.func)
9477 queue_work(md_misc_wq, &mddev->event_work);
9483 sysfs_notify_dirent_safe(rdev->sysfs_state);
9484 wait_event_timeout(rdev->blocked_wait,
9485 !test_bit(Blocked, &rdev->flags) &&
9486 !test_bit(BlockedBadBlocks, &rdev->flags),
9498 if (rdev->data_offset > rdev->new_data_offset)
9499 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9501 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9502 rdev->data_offset = rdev->new_data_offset;
9513 struct mddev *mddev = rdev->mddev;
9516 s += rdev->new_data_offset;
9518 s += rdev->data_offset;
9519 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9522 if (test_bit(ExternalBbl, &rdev->flags))
9523 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9524 sysfs_notify_dirent_safe(rdev->sysfs_state);
9525 set_mask_bits(&mddev->sb_flags, 0,
9527 md_wakeup_thread(rdev->mddev->thread);
9539 s += rdev->new_data_offset;
9541 s += rdev->data_offset;
9542 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9543 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9544 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9558 if (mddev->pers)
9560 if (mddev->persistent)
9561 mddev->safemode = 2;
9593 int ret = -ENOMEM;
9639 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9648 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9649 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9651 pr_info("md-cluster: resize failed\n");
9653 md_bitmap_update_sb(mddev->bitmap);
9658 if (test_bit(Faulty, &rdev2->flags))
9662 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9664 if (test_bit(Candidate, &rdev2->flags)) {
9666 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9671 clear_bit(Candidate, &rdev2->flags);
9674 if (role != rdev2->raid_disk) {
9678 if (rdev2->raid_disk == -1 && role != 0xffff &&
9679 !(le32_to_cpu(sb->feature_map) &
9681 rdev2->saved_raid_disk = role;
9684 bdevname(rdev2->bdev,b));
9685 /* wakeup mddev->thread here, so array could
9687 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9688 md_wakeup_thread(mddev->thread);
9697 clear_bit(Blocked, &rdev2->flags);
9702 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9703 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9709 * Since mddev->delta_disks has already updated in update_raid_disks,
9712 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9713 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9718 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9719 if (mddev->pers->update_reshape_pos)
9720 mddev->pers->update_reshape_pos(mddev);
9721 if (mddev->pers->start_reshape)
9722 mddev->pers->start_reshape(mddev);
9723 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9724 mddev->reshape_position != MaxSector &&
9725 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9727 mddev->reshape_position = MaxSector;
9728 if (mddev->pers->update_reshape_pos)
9729 mddev->pers->update_reshape_pos(mddev);
9733 mddev->events = le64_to_cpu(sb->events);
9739 struct page *swapout = rdev->sb_page;
9745 rdev->sb_page = NULL;
9748 ClearPageUptodate(rdev->sb_page);
9749 rdev->sb_loaded = 0;
9750 err = super_types[mddev->major_version].
9751 load_super(rdev, NULL, mddev->minor_version);
9755 __func__, __LINE__, rdev->desc_nr, err);
9756 if (rdev->sb_page)
9757 put_page(rdev->sb_page);
9758 rdev->sb_page = swapout;
9759 rdev->sb_loaded = 1;
9763 sb = page_address(rdev->sb_page);
9768 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9769 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9772 * device In_sync and mddev->degraded
9774 if (rdev->recovery_offset == MaxSector &&
9775 !test_bit(In_sync, &rdev->flags) &&
9776 mddev->pers->spare_active(mddev))
9777 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9790 if (iter->desc_nr == nr) {
9809 if (!test_bit(Faulty, &rdev->flags))
9835 node_detected_dev->dev = dev;
9837 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9859 list_del(&node_detected_dev->list);
9860 dev = node_detected_dev->dev;
9868 if (test_bit(Faulty, &rdev->flags))
9871 set_bit(AutoDetected, &rdev->flags);
9872 list_add(&rdev->same_set, &pending_raid_disks);
9899 * waiting for us in select() or poll() - wake them up
9912 mddev->ctime = 0;
9913 mddev->hold_active = 0;