• Home
  • Raw
  • Download

Lines Matching +full:conf +full:- +full:pu

1 // SPDX-License-Identifier: GPL-2.0-or-later
10 - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar
11 - RAID-6 extensions by H. Peter Anvin <hpa@zytor.com>
12 - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>
13 - kerneld support by Boris Tobotras <boris@xtalk.msk.su>
14 - kmod support by: Cyrus Durgin
15 - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com>
16 - Devfs support by Richard Gooch <rgooch@atnf.csiro.au>
18 - lots of fixes and improvements to the RAID1/RAID5 and generic
23 - persistent bitmap code
24 Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
44 #include <linux/blk-integrity.h>
65 #include <linux/percpu-refcount.h>
70 #include "md-bitmap.h"
71 #include "md-cluster.h"
103 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
108 * speed limit - in case reconstruction slows down your system despite
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
152 if (test_bit(CollisionCheck, &rdev->flags))
158 return -ENOMEM;
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
186 if (ret && !mddev->serial_info_pool)
194 * 1. it is multi-queue device flaged with writemostly.
195 * 2. the write-behind mode is enabled.
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
215 !test_bit(CollisionCheck, &rdev->flags))
228 if (mddev->serial_info_pool == NULL) {
233 mddev->serial_info_pool =
236 if (!mddev->serial_info_pool) {
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
259 if (mddev->serial_info_pool) {
267 if (!mddev->serialize_policy ||
273 test_bit(CollisionCheck, &temp->flags))
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
315 * a device node in /dev and to open it. This causes races with device-close.
352 * We hold a refcount over the call to ->make_request. By the time that
354 * and so is visible to ->quiesce(), so we don't need the refcount any more.
362 if (mddev->suspend_lo >= mddev->suspend_hi)
364 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
366 if (bio_end_sector(bio) < mddev->suspend_lo)
377 if (bio->bi_opf & REQ_NOWAIT) {
382 prepare_to_wait(&mddev->sb_wait, &__wait,
388 finish_wait(&mddev->sb_wait, &__wait);
390 if (!percpu_ref_tryget_live(&mddev->active_io))
393 if (!mddev->pers->make_request(mddev, bio)) {
394 percpu_ref_put(&mddev->active_io);
398 percpu_ref_put(&mddev->active_io);
405 struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
407 if (mddev == NULL || mddev->pers == NULL) {
412 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
421 if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
423 bio->bi_status = BLK_STS_IOERR;
429 bio->bi_opf &= ~REQ_NOMERGE;
442 struct md_thread *thread = rcu_dereference_protected(mddev->thread,
443 lockdep_is_held(&mddev->reconfig_mutex));
445 WARN_ON_ONCE(thread && current == thread->tsk);
446 if (mddev->suspended++)
448 wake_up(&mddev->sb_wait);
449 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
450 percpu_ref_kill(&mddev->active_io);
452 if (mddev->pers && mddev->pers->prepare_suspend)
453 mddev->pers->prepare_suspend(mddev);
455 wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
456 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
457 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
460 mddev->noio_flag = memalloc_noio_save();
466 lockdep_assert_held(&mddev->reconfig_mutex);
467 if (--mddev->suspended)
471 memalloc_noio_restore(mddev->noio_flag);
473 percpu_ref_resurrect(&mddev->active_io);
474 wake_up(&mddev->sb_wait);
476 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
477 md_wakeup_thread(mddev->thread);
478 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
488 struct md_rdev *rdev = bio->bi_private;
489 struct mddev *mddev = rdev->mddev;
495 if (atomic_dec_and_test(&mddev->flush_pending))
496 /* The pre-request flush has finished */
497 queue_work(md_wq, &mddev->flush_work);
507 mddev->start_flush = ktime_get_boottime();
508 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
509 atomic_set(&mddev->flush_pending, 1);
512 if (rdev->raid_disk >= 0 &&
513 !test_bit(Faulty, &rdev->flags)) {
516 atomic_inc(&rdev->nr_pending);
518 bi = bio_alloc_bioset(rdev->bdev, 0,
520 GFP_NOIO, &mddev->bio_set);
521 bi->bi_end_io = md_end_flush;
522 bi->bi_private = rdev;
523 atomic_inc(&mddev->flush_pending);
528 if (atomic_dec_and_test(&mddev->flush_pending))
529 queue_work(md_wq, &mddev->flush_work);
535 struct bio *bio = mddev->flush_bio;
543 spin_lock_irq(&mddev->lock);
544 mddev->prev_flush_start = mddev->start_flush;
545 mddev->flush_bio = NULL;
546 spin_unlock_irq(&mddev->lock);
547 wake_up(&mddev->sb_wait);
549 if (bio->bi_iter.bi_size == 0) {
550 /* an empty barrier - all done */
553 bio->bi_opf &= ~REQ_PREFLUSH;
557 * returns error in raid5_make_request() by dm-raid.
562 if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
567 percpu_ref_put(&mddev->active_io);
579 spin_lock_irq(&mddev->lock);
583 wait_event_lock_irq(mddev->sb_wait,
584 !mddev->flush_bio ||
585 ktime_before(req_start, mddev->prev_flush_start),
586 mddev->lock);
588 if (ktime_after(req_start, mddev->prev_flush_start)) {
589 WARN_ON(mddev->flush_bio);
600 WARN_ON(percpu_ref_is_zero(&mddev->active_io));
601 percpu_ref_get(&mddev->active_io);
602 mddev->flush_bio = bio;
605 spin_unlock_irq(&mddev->lock);
608 INIT_WORK(&mddev->flush_work, submit_flushes);
609 queue_work(md_wq, &mddev->flush_work);
612 if (bio->bi_iter.bi_size == 0)
613 /* an empty barrier - all done */
616 bio->bi_opf &= ~REQ_PREFLUSH;
628 if (test_bit(MD_DELETED, &mddev->flags))
630 atomic_inc(&mddev->active);
638 if (mddev->raid_disks || !list_empty(&mddev->disks) ||
639 mddev->ctime || mddev->hold_active)
643 set_bit(MD_DELETED, &mddev->flags);
649 queue_work(md_misc_wq, &mddev->del_work);
654 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
666 mutex_init(&mddev->open_mutex);
667 mutex_init(&mddev->reconfig_mutex);
668 mutex_init(&mddev->sync_mutex);
669 mutex_init(&mddev->bitmap_info.mutex);
670 INIT_LIST_HEAD(&mddev->disks);
671 INIT_LIST_HEAD(&mddev->all_mddevs);
672 INIT_LIST_HEAD(&mddev->deleting);
673 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
674 atomic_set(&mddev->active, 1);
675 atomic_set(&mddev->openers, 0);
676 atomic_set(&mddev->sync_seq, 0);
677 spin_lock_init(&mddev->lock);
678 atomic_set(&mddev->flush_pending, 0);
679 init_waitqueue_head(&mddev->sb_wait);
680 init_waitqueue_head(&mddev->recovery_wait);
681 mddev->reshape_position = MaxSector;
682 mddev->reshape_backwards = 0;
683 mddev->last_sync_action = "none";
684 mddev->resync_min = 0;
685 mddev->resync_max = MaxSector;
686 mddev->level = LEVEL_NONE;
688 INIT_WORK(&mddev->sync_work, md_start_sync);
689 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
698 if (mddev->unit == unit)
731 unit &= ~((1 << MdpMinorShift) - 1);
735 return ERR_PTR(-ENOMEM);
740 error = -EEXIST;
743 new->unit = unit;
745 new->md_minor = MINOR(unit);
747 new->md_minor = MINOR(unit) >> MdpMinorShift;
748 new->hold_active = UNTIL_IOCTL;
750 error = -ENODEV;
751 new->unit = mddev_alloc_unit();
752 if (!new->unit)
754 new->md_minor = MINOR(new->unit);
755 new->hold_active = UNTIL_STOP;
758 list_add(&new->all_mddevs, &all_mddevs);
770 list_del(&mddev->all_mddevs);
784 if (!list_empty(&mddev->deleting))
785 list_splice_init(&mddev->deleting, &delete);
787 if (mddev->to_remove) {
793 * and anything else which might set ->to_remove or my
795 * -EBUSY if sysfs_active is still set.
800 const struct attribute_group *to_remove = mddev->to_remove;
801 mddev->to_remove = NULL;
802 mddev->sysfs_active = 1;
803 mutex_unlock(&mddev->reconfig_mutex);
805 if (mddev->kobj.sd) {
807 sysfs_remove_group(&mddev->kobj, to_remove);
808 if (mddev->pers == NULL ||
809 mddev->pers->sync_request == NULL) {
810 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
811 if (mddev->sysfs_action)
812 sysfs_put(mddev->sysfs_action);
813 if (mddev->sysfs_completed)
814 sysfs_put(mddev->sysfs_completed);
815 if (mddev->sysfs_degraded)
816 sysfs_put(mddev->sysfs_degraded);
817 mddev->sysfs_action = NULL;
818 mddev->sysfs_completed = NULL;
819 mddev->sysfs_degraded = NULL;
822 mddev->sysfs_active = 0;
824 mutex_unlock(&mddev->reconfig_mutex);
826 md_wakeup_thread(mddev->thread);
827 wake_up(&mddev->sb_wait);
830 list_del_init(&rdev->same_set);
831 kobject_del(&rdev->kobj);
842 if (rdev->desc_nr == nr)
854 if (rdev->bdev->bd_dev == dev)
865 if (rdev->bdev->bd_dev == dev)
876 if (level != LEVEL_NONE && pers->level == level)
878 if (strcmp(pers->name, clevel)==0)
887 return MD_NEW_SIZE_SECTORS(bdev_nr_sectors(rdev->bdev));
892 rdev->sb_page = alloc_page(GFP_KERNEL);
893 if (!rdev->sb_page)
894 return -ENOMEM;
900 if (rdev->sb_page) {
901 put_page(rdev->sb_page);
902 rdev->sb_loaded = 0;
903 rdev->sb_page = NULL;
904 rdev->sb_start = 0;
905 rdev->sectors = 0;
907 if (rdev->bb_page) {
908 put_page(rdev->bb_page);
909 rdev->bb_page = NULL;
911 badblocks_exit(&rdev->badblocks);
917 struct md_rdev *rdev = bio->bi_private;
918 struct mddev *mddev = rdev->mddev;
920 if (bio->bi_status) {
922 blk_status_to_errno(bio->bi_status));
924 if (!test_bit(Faulty, &rdev->flags)
925 && (bio->bi_opf & MD_FAILFAST)) {
926 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
927 set_bit(LastDev, &rdev->flags);
930 clear_bit(LastDev, &rdev->flags);
936 if (atomic_dec_and_test(&mddev->pending_writes))
937 wake_up(&mddev->sb_wait);
944 * Increment mddev->pending_writes before returning
954 if (test_bit(Faulty, &rdev->flags))
957 bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
961 GFP_NOIO, &mddev->sync_set);
963 atomic_inc(&rdev->nr_pending);
965 bio->bi_iter.bi_sector = sector;
967 bio->bi_private = rdev;
968 bio->bi_end_io = super_written;
970 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
971 test_bit(FailFast, &rdev->flags) &&
972 !test_bit(LastDev, &rdev->flags))
973 bio->bi_opf |= MD_FAILFAST;
975 atomic_inc(&mddev->pending_writes);
982 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
983 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
984 return -EAGAIN;
994 if (metadata_op && rdev->meta_bdev)
995 bio_init(&bio, rdev->meta_bdev, &bvec, 1, opf);
997 bio_init(&bio, rdev->bdev, &bvec, 1, opf);
1000 bio.bi_iter.bi_sector = sector + rdev->sb_start;
1001 else if (rdev->mddev->reshape_position != MaxSector &&
1002 (rdev->mddev->reshape_backwards ==
1003 (sector >= rdev->mddev->reshape_position)))
1004 bio.bi_iter.bi_sector = sector + rdev->new_data_offset;
1006 bio.bi_iter.bi_sector = sector + rdev->data_offset;
1017 if (rdev->sb_loaded)
1020 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true))
1022 rdev->sb_loaded = 1;
1027 rdev->bdev);
1028 return -EINVAL;
1033 return sb1->set_uuid0 == sb2->set_uuid0 &&
1034 sb1->set_uuid1 == sb2->set_uuid1 &&
1035 sb1->set_uuid2 == sb2->set_uuid2 &&
1036 sb1->set_uuid3 == sb2->set_uuid3;
1058 tmp1->nr_disks = 0;
1059 tmp2->nr_disks = 0;
1081 disk_csum = sb->sb_csum;
1082 sb->sb_csum = 0;
1097 sb->sb_csum = md_csum_fold(disk_csum);
1099 sb->sb_csum = disk_csum;
1109 * We rely on user-space to write the initial superblock, and support
1116 * 0 - dev has a superblock that is compatible with refdev
1117 * 1 - dev has a superblock that is compatible and newer than refdev
1119 * -EINVAL superblock incompatible or invalid
1120 * -othererror e.g. -EIO
1124 * The first time, mddev->raid_disks will be 0, and data from
1126 * is new enough. Return 0 or -EINVAL
1155 * support bitmaps. It prints an error message and returns non-zero if mddev
1161 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1164 mdname(mddev), mddev->pers->name);
1184 rdev->sb_start = calc_dev_sboffset(rdev);
1190 ret = -EINVAL;
1192 sb = page_address(rdev->sb_page);
1194 if (sb->md_magic != MD_SB_MAGIC) {
1196 rdev->bdev);
1200 if (sb->major_version != 0 ||
1201 sb->minor_version < 90 ||
1202 sb->minor_version > 91) {
1204 sb->major_version, sb->minor_version, rdev->bdev);
1208 if (sb->raid_disks <= 0)
1211 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1212 pr_warn("md: invalid superblock checksum on %pg\n", rdev->bdev);
1216 rdev->preferred_minor = sb->md_minor;
1217 rdev->data_offset = 0;
1218 rdev->new_data_offset = 0;
1219 rdev->sb_size = MD_SB_BYTES;
1220 rdev->badblocks.shift = -1;
1222 if (sb->level == LEVEL_MULTIPATH)
1223 rdev->desc_nr = -1;
1225 rdev->desc_nr = sb->this_disk.number;
1228 if (sb->level == LEVEL_MULTIPATH ||
1229 (rdev->desc_nr >= 0 &&
1230 rdev->desc_nr < MD_SB_DISKS &&
1231 sb->disks[rdev->desc_nr].state &
1242 mdp_super_t *refsb = page_address(refdev->sb_page);
1245 rdev->bdev, refdev->bdev);
1250 rdev->bdev, refdev->bdev);
1261 rdev->sectors = rdev->sb_start;
1266 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1267 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1269 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1271 ret = -EINVAL;
1284 mdp_super_t *sb = page_address(rdev->sb_page);
1287 rdev->raid_disk = -1;
1288 clear_bit(Faulty, &rdev->flags);
1289 clear_bit(In_sync, &rdev->flags);
1290 clear_bit(Bitmap_sync, &rdev->flags);
1291 clear_bit(WriteMostly, &rdev->flags);
1293 if (mddev->raid_disks == 0) {
1294 mddev->major_version = 0;
1295 mddev->minor_version = sb->minor_version;
1296 mddev->patch_version = sb->patch_version;
1297 mddev->external = 0;
1298 mddev->chunk_sectors = sb->chunk_size >> 9;
1299 mddev->ctime = sb->ctime;
1300 mddev->utime = sb->utime;
1301 mddev->level = sb->level;
1302 mddev->clevel[0] = 0;
1303 mddev->layout = sb->layout;
1304 mddev->raid_disks = sb->raid_disks;
1305 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1306 mddev->events = ev1;
1307 mddev->bitmap_info.offset = 0;
1308 mddev->bitmap_info.space = 0;
1310 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1311 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1312 mddev->reshape_backwards = 0;
1314 if (mddev->minor_version >= 91) {
1315 mddev->reshape_position = sb->reshape_position;
1316 mddev->delta_disks = sb->delta_disks;
1317 mddev->new_level = sb->new_level;
1318 mddev->new_layout = sb->new_layout;
1319 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1320 if (mddev->delta_disks < 0)
1321 mddev->reshape_backwards = 1;
1323 mddev->reshape_position = MaxSector;
1324 mddev->delta_disks = 0;
1325 mddev->new_level = mddev->level;
1326 mddev->new_layout = mddev->layout;
1327 mddev->new_chunk_sectors = mddev->chunk_sectors;
1329 if (mddev->level == 0)
1330 mddev->layout = -1;
1332 if (sb->state & (1<<MD_SB_CLEAN))
1333 mddev->recovery_cp = MaxSector;
1335 if (sb->events_hi == sb->cp_events_hi &&
1336 sb->events_lo == sb->cp_events_lo) {
1337 mddev->recovery_cp = sb->recovery_cp;
1339 mddev->recovery_cp = 0;
1342 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1343 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1344 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1345 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1347 mddev->max_disks = MD_SB_DISKS;
1349 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1350 mddev->bitmap_info.file == NULL) {
1351 mddev->bitmap_info.offset =
1352 mddev->bitmap_info.default_offset;
1353 mddev->bitmap_info.space =
1354 mddev->bitmap_info.default_space;
1357 } else if (mddev->pers == NULL) {
1361 if (sb->disks[rdev->desc_nr].state & (
1363 if (ev1 < mddev->events)
1364 return -EINVAL;
1365 } else if (mddev->bitmap) {
1369 if (ev1 < mddev->bitmap->events_cleared)
1371 if (ev1 < mddev->events)
1372 set_bit(Bitmap_sync, &rdev->flags);
1374 if (ev1 < mddev->events)
1375 /* just a hot-add of a new device, leave raid_disk at -1 */
1379 if (mddev->level != LEVEL_MULTIPATH) {
1380 desc = sb->disks + rdev->desc_nr;
1382 if (desc->state & (1<<MD_DISK_FAULTY))
1383 set_bit(Faulty, &rdev->flags);
1384 else if (desc->state & (1<<MD_DISK_SYNC) /* &&
1385 desc->raid_disk < mddev->raid_disks */) {
1386 set_bit(In_sync, &rdev->flags);
1387 rdev->raid_disk = desc->raid_disk;
1388 rdev->saved_raid_disk = desc->raid_disk;
1389 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1393 if (mddev->minor_version >= 91) {
1394 rdev->recovery_offset = 0;
1395 rdev->raid_disk = desc->raid_disk;
1398 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1399 set_bit(WriteMostly, &rdev->flags);
1400 if (desc->state & (1<<MD_DISK_FAILFAST))
1401 set_bit(FailFast, &rdev->flags);
1403 set_bit(In_sync, &rdev->flags);
1414 int next_spare = mddev->raid_disks;
1416 /* make rdev->sb match mddev data..
1429 rdev->sb_size = MD_SB_BYTES;
1431 sb = page_address(rdev->sb_page);
1435 sb->md_magic = MD_SB_MAGIC;
1436 sb->major_version = mddev->major_version;
1437 sb->patch_version = mddev->patch_version;
1438 sb->gvalid_words = 0; /* ignored */
1439 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1440 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1441 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1442 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1444 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1445 sb->level = mddev->level;
1446 sb->size = mddev->dev_sectors / 2;
1447 sb->raid_disks = mddev->raid_disks;
1448 sb->md_minor = mddev->md_minor;
1449 sb->not_persistent = 0;
1450 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1451 sb->state = 0;
1452 sb->events_hi = (mddev->events>>32);
1453 sb->events_lo = (u32)mddev->events;
1455 if (mddev->reshape_position == MaxSector)
1456 sb->minor_version = 90;
1458 sb->minor_version = 91;
1459 sb->reshape_position = mddev->reshape_position;
1460 sb->new_level = mddev->new_level;
1461 sb->delta_disks = mddev->delta_disks;
1462 sb->new_layout = mddev->new_layout;
1463 sb->new_chunk = mddev->new_chunk_sectors << 9;
1465 mddev->minor_version = sb->minor_version;
1466 if (mddev->in_sync)
1468 sb->recovery_cp = mddev->recovery_cp;
1469 sb->cp_events_hi = (mddev->events>>32);
1470 sb->cp_events_lo = (u32)mddev->events;
1471 if (mddev->recovery_cp == MaxSector)
1472 sb->state = (1<< MD_SB_CLEAN);
1474 sb->recovery_cp = 0;
1476 sb->layout = mddev->layout;
1477 sb->chunk_size = mddev->chunk_sectors << 9;
1479 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1480 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1482 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1486 int is_active = test_bit(In_sync, &rdev2->flags);
1488 if (rdev2->raid_disk >= 0 &&
1489 sb->minor_version >= 91)
1492 * we can piggy-back on that.
1495 if (rdev2->raid_disk < 0 ||
1496 test_bit(Faulty, &rdev2->flags))
1499 desc_nr = rdev2->raid_disk;
1502 rdev2->desc_nr = desc_nr;
1503 d = &sb->disks[rdev2->desc_nr];
1505 d->number = rdev2->desc_nr;
1506 d->major = MAJOR(rdev2->bdev->bd_dev);
1507 d->minor = MINOR(rdev2->bdev->bd_dev);
1509 d->raid_disk = rdev2->raid_disk;
1511 d->raid_disk = rdev2->desc_nr; /* compatibility */
1512 if (test_bit(Faulty, &rdev2->flags))
1513 d->state = (1<<MD_DISK_FAULTY);
1515 d->state = (1<<MD_DISK_ACTIVE);
1516 if (test_bit(In_sync, &rdev2->flags))
1517 d->state |= (1<<MD_DISK_SYNC);
1521 d->state = 0;
1525 if (test_bit(WriteMostly, &rdev2->flags))
1526 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1527 if (test_bit(FailFast, &rdev2->flags))
1528 d->state |= (1<<MD_DISK_FAILFAST);
1531 for (i=0 ; i < mddev->raid_disks ; i++) {
1532 mdp_disk_t *d = &sb->disks[i];
1533 if (d->state == 0 && d->number == 0) {
1534 d->number = i;
1535 d->raid_disk = i;
1536 d->state = (1<<MD_DISK_REMOVED);
1537 d->state |= (1<<MD_DISK_FAULTY);
1541 sb->nr_disks = nr_disks;
1542 sb->active_disks = active;
1543 sb->working_disks = working;
1544 sb->failed_disks = failed;
1545 sb->spare_disks = spare;
1547 sb->this_disk = sb->disks[rdev->desc_nr];
1548 sb->sb_csum = calc_sb_csum(sb);
1557 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1559 if (rdev->mddev->bitmap_info.offset)
1561 rdev->sb_start = calc_dev_sboffset(rdev);
1562 if (!num_sectors || num_sectors > rdev->sb_start)
1563 num_sectors = rdev->sb_start;
1567 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1568 num_sectors = (sector_t)(2ULL << 32) - 2;
1570 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1571 rdev->sb_page);
1572 } while (md_super_wait(rdev->mddev) < 0);
1579 /* non-zero offset changes not possible with v0.90 */
1592 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1595 disk_csum = sb->sb_csum;
1596 sb->sb_csum = 0;
1598 for (; size >= 4; size -= 4)
1605 sb->sb_csum = disk_csum;
1628 sb_start = bdev_nr_sectors(rdev->bdev) - 8 * 2;
1629 sb_start &= ~(sector_t)(4*2-1);
1638 return -EINVAL;
1640 rdev->sb_start = sb_start;
1648 sb = page_address(rdev->sb_page);
1650 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1651 sb->major_version != cpu_to_le32(1) ||
1652 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1653 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1654 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1655 return -EINVAL;
1657 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1659 rdev->bdev);
1660 return -EINVAL;
1662 if (le64_to_cpu(sb->data_size) < 10) {
1664 rdev->bdev);
1665 return -EINVAL;
1667 if (sb->pad0 ||
1668 sb->pad3[0] ||
1669 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1670 /* Some padding is non-zero, might be a new feature */
1671 return -EINVAL;
1673 rdev->preferred_minor = 0xffff;
1674 rdev->data_offset = le64_to_cpu(sb->data_offset);
1675 rdev->new_data_offset = rdev->data_offset;
1676 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1677 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1678 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1679 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1681 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1682 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1683 if (rdev->sb_size & bmask)
1684 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1687 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1688 return -EINVAL;
1690 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1691 return -EINVAL;
1693 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1694 rdev->desc_nr = -1;
1696 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1698 if (!rdev->bb_page) {
1699 rdev->bb_page = alloc_page(GFP_KERNEL);
1700 if (!rdev->bb_page)
1701 return -ENOMEM;
1703 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1704 rdev->badblocks.count == 0) {
1712 int sectors = le16_to_cpu(sb->bblog_size);
1714 return -EINVAL;
1715 offset = le32_to_cpu(sb->bblog_offset);
1717 return -EINVAL;
1720 rdev->bb_page, REQ_OP_READ, true))
1721 return -EIO;
1722 bbp = (__le64 *)page_address(rdev->bb_page);
1723 rdev->badblocks.shift = sb->bblog_shift;
1724 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1728 sector <<= sb->bblog_shift;
1729 count <<= sb->bblog_shift;
1732 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1733 return -EINVAL;
1735 } else if (sb->bblog_offset != 0)
1736 rdev->badblocks.shift = 0;
1738 if ((le32_to_cpu(sb->feature_map) &
1740 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1741 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1742 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1745 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1746 sb->level != 0)
1747 return -EINVAL;
1750 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1751 (rdev->desc_nr >= 0 &&
1752 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1753 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1754 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1764 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1766 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1767 sb->level != refsb->level ||
1768 sb->layout != refsb->layout ||
1769 sb->chunksize != refsb->chunksize) {
1771 rdev->bdev,
1772 refdev->bdev);
1773 return -EINVAL;
1775 ev1 = le64_to_cpu(sb->events);
1776 ev2 = le64_to_cpu(refsb->events);
1784 sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset;
1786 sectors = rdev->sb_start;
1787 if (sectors < le64_to_cpu(sb->data_size))
1788 return -EINVAL;
1789 rdev->sectors = le64_to_cpu(sb->data_size);
1795 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1796 __u64 ev1 = le64_to_cpu(sb->events);
1798 rdev->raid_disk = -1;
1799 clear_bit(Faulty, &rdev->flags);
1800 clear_bit(In_sync, &rdev->flags);
1801 clear_bit(Bitmap_sync, &rdev->flags);
1802 clear_bit(WriteMostly, &rdev->flags);
1804 if (mddev->raid_disks == 0) {
1805 mddev->major_version = 1;
1806 mddev->patch_version = 0;
1807 mddev->external = 0;
1808 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1809 mddev->ctime = le64_to_cpu(sb->ctime);
1810 mddev->utime = le64_to_cpu(sb->utime);
1811 mddev->level = le32_to_cpu(sb->level);
1812 mddev->clevel[0] = 0;
1813 mddev->layout = le32_to_cpu(sb->layout);
1814 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1815 mddev->dev_sectors = le64_to_cpu(sb->size);
1816 mddev->events = ev1;
1817 mddev->bitmap_info.offset = 0;
1818 mddev->bitmap_info.space = 0;
1820 * using 3K - total of 4K
1822 mddev->bitmap_info.default_offset = 1024 >> 9;
1823 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1824 mddev->reshape_backwards = 0;
1826 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1827 memcpy(mddev->uuid, sb->set_uuid, 16);
1829 mddev->max_disks = (4096-256)/2;
1831 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1832 mddev->bitmap_info.file == NULL) {
1833 mddev->bitmap_info.offset =
1834 (__s32)le32_to_cpu(sb->bitmap_offset);
1840 if (mddev->minor_version > 0)
1841 mddev->bitmap_info.space = 0;
1842 else if (mddev->bitmap_info.offset > 0)
1843 mddev->bitmap_info.space =
1844 8 - mddev->bitmap_info.offset;
1846 mddev->bitmap_info.space =
1847 -mddev->bitmap_info.offset;
1850 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1851 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1852 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1853 mddev->new_level = le32_to_cpu(sb->new_level);
1854 mddev->new_layout = le32_to_cpu(sb->new_layout);
1855 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1856 if (mddev->delta_disks < 0 ||
1857 (mddev->delta_disks == 0 &&
1858 (le32_to_cpu(sb->feature_map)
1860 mddev->reshape_backwards = 1;
1862 mddev->reshape_position = MaxSector;
1863 mddev->delta_disks = 0;
1864 mddev->new_level = mddev->level;
1865 mddev->new_layout = mddev->layout;
1866 mddev->new_chunk_sectors = mddev->chunk_sectors;
1869 if (mddev->level == 0 &&
1870 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1871 mddev->layout = -1;
1873 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1874 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1876 if (le32_to_cpu(sb->feature_map) &
1878 if (le32_to_cpu(sb->feature_map) &
1880 return -EINVAL;
1881 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1882 (le32_to_cpu(sb->feature_map) &
1884 return -EINVAL;
1885 set_bit(MD_HAS_PPL, &mddev->flags);
1887 } else if (mddev->pers == NULL) {
1893 if (rdev->desc_nr >= 0 &&
1894 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1895 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1896 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1897 if (ev1 + 1 < mddev->events)
1898 return -EINVAL;
1899 } else if (mddev->bitmap) {
1903 if (ev1 < mddev->bitmap->events_cleared)
1905 if (ev1 < mddev->events)
1906 set_bit(Bitmap_sync, &rdev->flags);
1908 if (ev1 < mddev->events)
1909 /* just a hot-add of a new device, leave raid_disk at -1 */
1912 if (mddev->level != LEVEL_MULTIPATH) {
1914 if (rdev->desc_nr < 0 ||
1915 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1917 rdev->desc_nr = -1;
1918 } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
1933 struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
1934 u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
1936 if (rdev->desc_nr >= freshest_max_dev) {
1938 pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
1939 mdname(mddev), rdev->bdev, rdev->desc_nr,
1940 freshest->bdev, freshest_max_dev);
1941 return -EUCLEAN;
1944 role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
1946 mdname(mddev), rdev->bdev, role, role, freshest->bdev);
1948 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1954 set_bit(Faulty, &rdev->flags);
1957 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1960 return -EINVAL;
1962 set_bit(Journal, &rdev->flags);
1963 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1964 rdev->raid_disk = 0;
1967 rdev->saved_raid_disk = role;
1968 if ((le32_to_cpu(sb->feature_map) &
1970 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1971 if (!(le32_to_cpu(sb->feature_map) &
1973 rdev->saved_raid_disk = -1;
1980 &mddev->recovery))
1981 set_bit(In_sync, &rdev->flags);
1983 rdev->raid_disk = role;
1986 if (sb->devflags & WriteMostly1)
1987 set_bit(WriteMostly, &rdev->flags);
1988 if (sb->devflags & FailFast1)
1989 set_bit(FailFast, &rdev->flags);
1990 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1991 set_bit(Replacement, &rdev->flags);
1993 set_bit(In_sync, &rdev->flags);
2003 /* make rdev->sb match mddev and rdev data. */
2005 sb = page_address(rdev->sb_page);
2007 sb->feature_map = 0;
2008 sb->pad0 = 0;
2009 sb->recovery_offset = cpu_to_le64(0);
2010 memset(sb->pad3, 0, sizeof(sb->pad3));
2012 sb->utime = cpu_to_le64((__u64)mddev->utime);
2013 sb->events = cpu_to_le64(mddev->events);
2014 if (mddev->in_sync)
2015 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2016 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2017 sb->resync_offset = cpu_to_le64(MaxSector);
2019 sb->resync_offset = cpu_to_le64(0);
2021 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2023 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2024 sb->size = cpu_to_le64(mddev->dev_sectors);
2025 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2026 sb->level = cpu_to_le32(mddev->level);
2027 sb->layout = cpu_to_le32(mddev->layout);
2028 if (test_bit(FailFast, &rdev->flags))
2029 sb->devflags |= FailFast1;
2031 sb->devflags &= ~FailFast1;
2033 if (test_bit(WriteMostly, &rdev->flags))
2034 sb->devflags |= WriteMostly1;
2036 sb->devflags &= ~WriteMostly1;
2037 sb->data_offset = cpu_to_le64(rdev->data_offset);
2038 sb->data_size = cpu_to_le64(rdev->sectors);
2040 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2041 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2042 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2045 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2046 !test_bit(In_sync, &rdev->flags)) {
2047 sb->feature_map |=
2049 sb->recovery_offset =
2050 cpu_to_le64(rdev->recovery_offset);
2051 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2052 sb->feature_map |=
2056 if (test_bit(Journal, &rdev->flags))
2057 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2058 if (test_bit(Replacement, &rdev->flags))
2059 sb->feature_map |=
2062 if (mddev->reshape_position != MaxSector) {
2063 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2064 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2065 sb->new_layout = cpu_to_le32(mddev->new_layout);
2066 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2067 sb->new_level = cpu_to_le32(mddev->new_level);
2068 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2069 if (mddev->delta_disks == 0 &&
2070 mddev->reshape_backwards)
2071 sb->feature_map
2073 if (rdev->new_data_offset != rdev->data_offset) {
2074 sb->feature_map
2076 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2077 - rdev->data_offset));
2082 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2084 if (rdev->badblocks.count == 0)
2086 else if (sb->bblog_offset == 0)
2090 struct badblocks *bb = &rdev->badblocks;
2091 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2092 u64 *p = bb->page;
2093 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2094 if (bb->changed) {
2098 seq = read_seqbegin(&bb->lock);
2102 for (i = 0 ; i < bb->count ; i++) {
2108 bb->changed = 0;
2109 if (read_seqretry(&bb->lock, seq))
2112 bb->sector = (rdev->sb_start +
2113 (int)le32_to_cpu(sb->bblog_offset));
2114 bb->size = le16_to_cpu(sb->bblog_size);
2120 if (rdev2->desc_nr+1 > max_dev)
2121 max_dev = rdev2->desc_nr+1;
2123 if (max_dev > le32_to_cpu(sb->max_dev)) {
2125 sb->max_dev = cpu_to_le32(max_dev);
2126 rdev->sb_size = max_dev * 2 + 256;
2127 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2128 if (rdev->sb_size & bmask)
2129 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2131 max_dev = le32_to_cpu(sb->max_dev);
2134 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2136 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2137 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2139 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2140 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2141 sb->feature_map |=
2144 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2145 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2146 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2150 i = rdev2->desc_nr;
2151 if (test_bit(Faulty, &rdev2->flags))
2152 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2153 else if (test_bit(In_sync, &rdev2->flags))
2154 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2155 else if (test_bit(Journal, &rdev2->flags))
2156 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2157 else if (rdev2->raid_disk >= 0)
2158 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2160 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2163 sb->sb_csum = calc_sb_1_csum(sb);
2175 else if (dev_size - 64*2 >= 200*1024*1024*2)
2177 else if (dev_size - 4*2 > 8*1024*1024*2)
2189 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2191 if (rdev->data_offset != rdev->new_data_offset)
2193 if (rdev->sb_start < rdev->data_offset) {
2195 max_sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset;
2198 } else if (rdev->mddev->bitmap_info.offset) {
2204 sector_t dev_size = bdev_nr_sectors(rdev->bdev);
2207 sb_start = dev_size - 8*2;
2208 sb_start &= ~(sector_t)(4*2 - 1);
2215 max_sectors = sb_start - bm_space - 4*2;
2219 rdev->sb_start = sb_start;
2221 sb = page_address(rdev->sb_page);
2222 sb->data_size = cpu_to_le64(num_sectors);
2223 sb->super_offset = cpu_to_le64(rdev->sb_start);
2224 sb->sb_csum = calc_sb_1_csum(sb);
2226 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2227 rdev->sb_page);
2228 } while (md_super_wait(rdev->mddev) < 0);
2239 if (new_offset >= rdev->data_offset)
2244 if (rdev->mddev->minor_version == 0)
2251 * beyond write-intent bitmap
2253 if (rdev->sb_start + (32+4)*2 > new_offset)
2255 bitmap = rdev->mddev->bitmap;
2256 if (bitmap && !rdev->mddev->bitmap_info.file &&
2257 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2258 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2260 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2277 .name = "md-1",
2289 if (mddev->sync_super) {
2290 mddev->sync_super(mddev, rdev);
2294 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2296 super_types[mddev->major_version].sync_super(mddev, rdev);
2305 if (test_bit(Faulty, &rdev->flags) ||
2306 test_bit(Journal, &rdev->flags) ||
2307 rdev->raid_disk == -1)
2310 if (test_bit(Faulty, &rdev2->flags) ||
2311 test_bit(Journal, &rdev2->flags) ||
2312 rdev2->raid_disk == -1)
2314 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2337 if (list_empty(&mddev->disks))
2339 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2342 /* skip spares and non-functional disks */
2343 if (test_bit(Faulty, &rdev->flags))
2345 if (rdev->raid_disk < 0)
2353 if (blk_integrity_compare(reference->bdev->bd_disk,
2354 rdev->bdev->bd_disk) < 0)
2355 return -EINVAL;
2357 if (!reference || !bdev_get_integrity(reference->bdev))
2363 blk_integrity_register(mddev->gendisk,
2364 bdev_get_integrity(reference->bdev));
2367 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
2368 (mddev->level != 1 && mddev->level != 10 &&
2369 bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
2372 * because the function is called by md_run() -> pers->run(),
2373 * md_run calls bioset_exit -> bioset_integrity_free in case
2378 return -EINVAL;
2392 if (!mddev->gendisk)
2395 bi_mddev = blk_get_integrity(mddev->gendisk);
2400 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2402 mdname(mddev), rdev->bdev);
2403 return -ENXIO;
2412 return bdev_read_only(rdev->bdev) ||
2413 (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
2422 if (find_rdev(mddev, rdev->bdev->bd_dev))
2423 return -EEXIST;
2425 if (rdev_read_only(rdev) && mddev->pers)
2426 return -EROFS;
2428 /* make sure rdev->sectors exceeds mddev->dev_sectors */
2429 if (!test_bit(Journal, &rdev->flags) &&
2430 rdev->sectors &&
2431 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2432 if (mddev->pers) {
2434 * If mddev->level <= 0, then we don't care
2437 if (mddev->level > 0)
2438 return -ENOSPC;
2440 mddev->dev_sectors = rdev->sectors;
2443 /* Verify rdev->desc_nr is unique.
2444 * If it is -1, assign a free number, else
2448 if (rdev->desc_nr < 0) {
2450 if (mddev->pers)
2451 choice = mddev->raid_disks;
2454 rdev->desc_nr = choice;
2456 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2458 return -EBUSY;
2462 if (!test_bit(Journal, &rdev->flags) &&
2463 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2465 mdname(mddev), mddev->max_disks);
2466 return -EBUSY;
2468 snprintf(b, sizeof(b), "%pg", rdev->bdev);
2471 rdev->mddev = mddev;
2474 if (mddev->raid_disks)
2477 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2481 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2482 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2483 rdev->sysfs_unack_badblocks =
2484 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2485 rdev->sysfs_badblocks =
2486 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2488 list_add_rcu(&rdev->same_set, &mddev->disks);
2489 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2492 mddev->recovery_disabled++;
2497 pr_warn("md: failed to register dev-%s for %s\n",
2510 pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
2513 if (test_bit(AutoDetected, &rdev->flags))
2514 md_autodetect_dev(rdev->bdev->bd_dev);
2516 blkdev_put(rdev->bdev,
2517 test_bit(Holder, &rdev->flags) ? rdev : &claim_rdev);
2518 rdev->bdev = NULL;
2519 kobject_put(&rdev->kobj);
2524 struct mddev *mddev = rdev->mddev;
2526 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2527 list_del_rcu(&rdev->same_set);
2528 pr_debug("md: unbind<%pg>\n", rdev->bdev);
2529 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2530 rdev->mddev = NULL;
2531 sysfs_remove_link(&rdev->kobj, "block");
2532 sysfs_put(rdev->sysfs_state);
2533 sysfs_put(rdev->sysfs_unack_badblocks);
2534 sysfs_put(rdev->sysfs_badblocks);
2535 rdev->sysfs_state = NULL;
2536 rdev->sysfs_unack_badblocks = NULL;
2537 rdev->sysfs_badblocks = NULL;
2538 rdev->badblocks.count = 0;
2547 list_add(&rdev->same_set, &mddev->deleting);
2554 while (!list_empty(&mddev->disks)) {
2555 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2559 mddev->raid_disks = 0;
2560 mddev->major_version = 0;
2565 lockdep_assert_held(&mddev->lock);
2566 if (!mddev->in_sync) {
2567 mddev->sync_checkers++;
2568 spin_unlock(&mddev->lock);
2569 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2570 spin_lock(&mddev->lock);
2571 if (!mddev->in_sync &&
2572 percpu_ref_is_zero(&mddev->writes_pending)) {
2573 mddev->in_sync = 1;
2575 * Ensure ->in_sync is visible before we clear
2576 * ->sync_checkers.
2579 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2580 sysfs_notify_dirent_safe(mddev->sysfs_state);
2582 if (--mddev->sync_checkers == 0)
2583 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2585 if (mddev->safemode == 1)
2586 mddev->safemode = 0;
2587 return mddev->in_sync;
2592 /* Update each superblock (in-memory image), but
2600 if (rdev->sb_events == mddev->events ||
2602 rdev->raid_disk < 0 &&
2603 rdev->sb_events+1 == mddev->events)) {
2605 rdev->sb_loaded = 2;
2608 rdev->sb_loaded = 1;
2621 if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
2630 sb = page_address(rdev->sb_page);
2633 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2635 if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
2636 !test_bit(Faulty, &rdev->flags))
2639 if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
2644 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2645 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2646 (mddev->layout != le32_to_cpu(sb->layout)) ||
2647 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2648 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2660 int ret = -1;
2664 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2670 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2672 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2674 ret = md_cluster_ops->metadata_update_start(mddev);
2678 md_cluster_ops->metadata_update_cancel(mddev);
2679 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2689 * During reshape/resync it might use array-addresses rather
2693 if (rdev->raid_disk >= 0 &&
2694 mddev->delta_disks >= 0 &&
2695 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2696 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2697 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2698 !test_bit(Journal, &rdev->flags) &&
2699 !test_bit(In_sync, &rdev->flags) &&
2700 mddev->curr_resync_completed > rdev->recovery_offset)
2701 rdev->recovery_offset = mddev->curr_resync_completed;
2704 if (!mddev->persistent) {
2705 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2706 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2707 if (!mddev->external) {
2708 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2710 if (rdev->badblocks.changed) {
2711 rdev->badblocks.changed = 0;
2712 ack_all_badblocks(&rdev->badblocks);
2715 clear_bit(Blocked, &rdev->flags);
2716 clear_bit(BlockedBadBlocks, &rdev->flags);
2717 wake_up(&rdev->blocked_wait);
2720 wake_up(&mddev->sb_wait);
2724 spin_lock(&mddev->lock);
2726 mddev->utime = ktime_get_real_seconds();
2728 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2730 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2731 /* just a clean<-> dirty transition, possibly leave spares alone,
2738 if (mddev->degraded)
2742 * might have a event_count that still looks up-to-date,
2743 * so it can be re-added without a resync.
2750 sync_req = mddev->in_sync;
2752 /* If this is just a dirty<->clean transition, and the array is clean
2755 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2756 && mddev->can_decrease_events
2757 && mddev->events != 1) {
2758 mddev->events--;
2759 mddev->can_decrease_events = 0;
2762 mddev->events ++;
2763 mddev->can_decrease_events = nospares;
2767 * This 64-bit counter should never wrap.
2771 WARN_ON(mddev->events == 0);
2774 if (rdev->badblocks.changed)
2776 if (test_bit(Faulty, &rdev->flags))
2777 set_bit(FaultRecorded, &rdev->flags);
2781 spin_unlock(&mddev->lock);
2784 mdname(mddev), mddev->in_sync);
2786 if (mddev->queue)
2787 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2789 md_bitmap_update_sb(mddev->bitmap);
2791 if (rdev->sb_loaded != 1)
2794 if (!test_bit(Faulty, &rdev->flags)) {
2796 rdev->sb_start, rdev->sb_size,
2797 rdev->sb_page);
2799 rdev->bdev,
2800 (unsigned long long)rdev->sb_start);
2801 rdev->sb_events = mddev->events;
2802 if (rdev->badblocks.size) {
2804 rdev->badblocks.sector,
2805 rdev->badblocks.size << 9,
2806 rdev->bb_page);
2807 rdev->badblocks.size = 0;
2812 rdev->bdev);
2814 if (mddev->level == LEVEL_MULTIPATH)
2820 /* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
2823 md_cluster_ops->metadata_update_finish(mddev);
2825 if (mddev->in_sync != sync_req ||
2826 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2830 wake_up(&mddev->sb_wait);
2831 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2832 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2835 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2836 clear_bit(Blocked, &rdev->flags);
2839 ack_all_badblocks(&rdev->badblocks);
2840 clear_bit(BlockedBadBlocks, &rdev->flags);
2841 wake_up(&rdev->blocked_wait);
2848 struct mddev *mddev = rdev->mddev;
2850 bool add_journal = test_bit(Journal, &rdev->flags);
2852 if (!mddev->pers->hot_remove_disk || add_journal) {
2857 super_types[mddev->major_version].
2861 err = mddev->pers->hot_add_disk(mddev, rdev);
2869 sysfs_notify_dirent_safe(rdev->sysfs_state);
2871 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2872 if (mddev->degraded)
2873 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2874 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2876 md_wakeup_thread(mddev->thread);
2911 unsigned long flags = READ_ONCE(rdev->flags);
2915 rdev->badblocks.unacked_exist))
2924 (rdev->badblocks.unacked_exist
2943 len -= strlen(sep);
2952 * faulty - simulates an error
2953 * remove - disconnects the device
2954 * writemostly - sets write_mostly
2955 * -writemostly - clears write_mostly
2956 * blocked - sets the Blocked flags
2957 * -blocked - clears the Blocked and possibly simulates an error
2958 * insync - sets Insync providing device isn't active
2959 * -insync - clear Insync for a device with a slot assigned,
2961 * write_error - sets WriteErrorSeen
2962 * -write_error - clears WriteErrorSeen
2963 * {,-}failfast - set/clear FailFast
2966 struct mddev *mddev = rdev->mddev;
2967 int err = -EINVAL;
2970 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2971 md_error(rdev->mddev, rdev);
2973 if (test_bit(MD_BROKEN, &rdev->mddev->flags))
2974 err = -EBUSY;
2978 if (rdev->mddev->pers) {
2979 clear_bit(Blocked, &rdev->flags);
2980 remove_and_add_spares(rdev->mddev, rdev);
2982 if (rdev->raid_disk >= 0)
2983 err = -EBUSY;
2987 err = md_cluster_ops->remove_disk(mddev, rdev);
2991 if (mddev->pers) {
2992 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2993 md_wakeup_thread(mddev->thread);
2999 set_bit(WriteMostly, &rdev->flags);
3000 mddev_create_serial_pool(rdev->mddev, rdev, false);
3003 } else if (cmd_match(buf, "-writemostly")) {
3004 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3005 clear_bit(WriteMostly, &rdev->flags);
3009 set_bit(Blocked, &rdev->flags);
3011 } else if (cmd_match(buf, "-blocked")) {
3012 if (!test_bit(Faulty, &rdev->flags) &&
3013 !test_bit(ExternalBbl, &rdev->flags) &&
3014 rdev->badblocks.unacked_exist) {
3018 md_error(rdev->mddev, rdev);
3020 clear_bit(Blocked, &rdev->flags);
3021 clear_bit(BlockedBadBlocks, &rdev->flags);
3022 wake_up(&rdev->blocked_wait);
3023 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3024 md_wakeup_thread(rdev->mddev->thread);
3027 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3028 set_bit(In_sync, &rdev->flags);
3031 set_bit(FailFast, &rdev->flags);
3034 } else if (cmd_match(buf, "-failfast")) {
3035 clear_bit(FailFast, &rdev->flags);
3038 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3039 !test_bit(Journal, &rdev->flags)) {
3040 if (rdev->mddev->pers == NULL) {
3041 clear_bit(In_sync, &rdev->flags);
3042 rdev->saved_raid_disk = rdev->raid_disk;
3043 rdev->raid_disk = -1;
3047 set_bit(WriteErrorSeen, &rdev->flags);
3049 } else if (cmd_match(buf, "-write_error")) {
3050 clear_bit(WriteErrorSeen, &rdev->flags);
3053 /* Any non-spare device that is not a replacement can
3057 if (rdev->raid_disk >= 0 &&
3058 !test_bit(Journal, &rdev->flags) &&
3059 !test_bit(Replacement, &rdev->flags))
3060 set_bit(WantReplacement, &rdev->flags);
3061 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3062 md_wakeup_thread(rdev->mddev->thread);
3064 } else if (cmd_match(buf, "-want_replacement")) {
3069 clear_bit(WantReplacement, &rdev->flags);
3075 if (rdev->mddev->pers)
3076 err = -EBUSY;
3078 set_bit(Replacement, &rdev->flags);
3081 } else if (cmd_match(buf, "-replacement")) {
3083 if (rdev->mddev->pers)
3084 err = -EBUSY;
3086 clear_bit(Replacement, &rdev->flags);
3089 } else if (cmd_match(buf, "re-add")) {
3090 if (!rdev->mddev->pers)
3091 err = -EINVAL;
3092 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3093 rdev->saved_raid_disk >= 0) {
3100 if (!mddev_is_clustered(rdev->mddev) ||
3101 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3102 clear_bit(Faulty, &rdev->flags);
3106 err = -EBUSY;
3107 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3108 set_bit(ExternalBbl, &rdev->flags);
3109 rdev->badblocks.shift = 0;
3111 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3112 clear_bit(ExternalBbl, &rdev->flags);
3118 sysfs_notify_dirent_safe(rdev->sysfs_state);
3127 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3139 atomic_set(&rdev->corrected_errors, n);
3148 if (test_bit(Journal, &rdev->flags))
3150 else if (rdev->raid_disk < 0)
3153 return sprintf(page, "%d\n", rdev->raid_disk);
3162 if (test_bit(Journal, &rdev->flags))
3163 return -EBUSY;
3165 slot = -1;
3172 return -ENOSPC;
3174 if (rdev->mddev->pers && slot == -1) {
3177 * with the personality with ->hot_*_disk.
3182 if (rdev->raid_disk == -1)
3183 return -EEXIST;
3185 if (rdev->mddev->pers->hot_remove_disk == NULL)
3186 return -EINVAL;
3187 clear_bit(Blocked, &rdev->flags);
3188 remove_and_add_spares(rdev->mddev, rdev);
3189 if (rdev->raid_disk >= 0)
3190 return -EBUSY;
3191 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3192 md_wakeup_thread(rdev->mddev->thread);
3193 } else if (rdev->mddev->pers) {
3199 if (rdev->raid_disk != -1)
3200 return -EBUSY;
3202 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3203 return -EBUSY;
3205 if (rdev->mddev->pers->hot_add_disk == NULL)
3206 return -EINVAL;
3208 if (slot >= rdev->mddev->raid_disks &&
3209 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3210 return -ENOSPC;
3212 rdev->raid_disk = slot;
3213 if (test_bit(In_sync, &rdev->flags))
3214 rdev->saved_raid_disk = slot;
3216 rdev->saved_raid_disk = -1;
3217 clear_bit(In_sync, &rdev->flags);
3218 clear_bit(Bitmap_sync, &rdev->flags);
3219 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3221 rdev->raid_disk = -1;
3224 sysfs_notify_dirent_safe(rdev->sysfs_state);
3226 sysfs_link_rdev(rdev->mddev, rdev);
3229 if (slot >= rdev->mddev->raid_disks &&
3230 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3231 return -ENOSPC;
3232 rdev->raid_disk = slot;
3234 clear_bit(Faulty, &rdev->flags);
3235 clear_bit(WriteMostly, &rdev->flags);
3236 set_bit(In_sync, &rdev->flags);
3237 sysfs_notify_dirent_safe(rdev->sysfs_state);
3248 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3256 return -EINVAL;
3257 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3258 return -EBUSY;
3259 if (rdev->sectors && rdev->mddev->external)
3262 return -EBUSY;
3263 rdev->data_offset = offset;
3264 rdev->new_data_offset = offset;
3274 (unsigned long long)rdev->new_data_offset);
3281 struct mddev *mddev = rdev->mddev;
3284 return -EINVAL;
3286 if (mddev->sync_thread ||
3287 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3288 return -EBUSY;
3289 if (new_offset == rdev->data_offset)
3292 else if (new_offset > rdev->data_offset) {
3294 if (new_offset - rdev->data_offset
3295 + mddev->dev_sectors > rdev->sectors)
3296 return -E2BIG;
3303 if (new_offset < rdev->data_offset &&
3304 mddev->reshape_backwards)
3305 return -EINVAL;
3310 if (new_offset > rdev->data_offset &&
3311 !mddev->reshape_backwards)
3312 return -EINVAL;
3314 if (mddev->pers && mddev->persistent &&
3315 !super_types[mddev->major_version]
3317 return -E2BIG;
3318 rdev->new_data_offset = new_offset;
3319 if (new_offset > rdev->data_offset)
3320 mddev->reshape_backwards = 1;
3321 else if (new_offset < rdev->data_offset)
3322 mddev->reshape_backwards = 0;
3332 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3338 if (a->data_offset + a->sectors <= b->data_offset)
3340 if (b->data_offset + b->sectors <= a->data_offset)
3352 if (test_bit(MD_DELETED, &mddev->flags))
3355 if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
3372 return -EINVAL;
3374 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3375 return -EINVAL; /* sector conversion overflow */
3379 return -EINVAL; /* unsigned long long to sector_t overflow */
3388 struct mddev *my_mddev = rdev->mddev;
3389 sector_t oldsectors = rdev->sectors;
3392 if (test_bit(Journal, &rdev->flags))
3393 return -EBUSY;
3395 return -EINVAL;
3396 if (rdev->data_offset != rdev->new_data_offset)
3397 return -EINVAL; /* too confusing */
3398 if (my_mddev->pers && rdev->raid_disk >= 0) {
3399 if (my_mddev->persistent) {
3400 sectors = super_types[my_mddev->major_version].
3403 return -EBUSY;
3405 sectors = bdev_nr_sectors(rdev->bdev) -
3406 rdev->data_offset;
3407 if (!my_mddev->pers->resize)
3409 return -EINVAL;
3411 if (sectors < my_mddev->dev_sectors)
3412 return -EINVAL; /* component must fit device */
3414 rdev->sectors = sectors;
3421 if (sectors > oldsectors && my_mddev->external &&
3428 rdev->sectors = oldsectors;
3429 return -EBUSY;
3439 unsigned long long recovery_start = rdev->recovery_offset;
3441 if (test_bit(In_sync, &rdev->flags) ||
3455 return -EINVAL;
3457 if (rdev->mddev->pers &&
3458 rdev->raid_disk >= 0)
3459 return -EBUSY;
3461 rdev->recovery_offset = recovery_start;
3463 set_bit(In_sync, &rdev->flags);
3465 clear_bit(In_sync, &rdev->flags);
3472 /* sysfs access to bad-blocks list.
3474 * 'bad-blocks' lists sector numbers and lengths of ranges that
3476 * the one-page limit of sysfs.
3479 * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
3485 return badblocks_show(&rdev->badblocks, page, 0);
3489 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3491 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3492 wake_up(&rdev->blocked_wait);
3500 return badblocks_show(&rdev->badblocks, page, 1);
3504 return badblocks_store(&rdev->badblocks, page, len, 1);
3512 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3521 return -EINVAL;
3523 return -EINVAL;
3525 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3526 rdev->raid_disk >= 0)
3527 return -EBUSY;
3529 if (rdev->mddev->persistent) {
3530 if (rdev->mddev->major_version == 0)
3531 return -EINVAL;
3532 if ((sector > rdev->sb_start &&
3533 sector - rdev->sb_start > S16_MAX) ||
3534 (sector < rdev->sb_start &&
3535 rdev->sb_start - sector > -S16_MIN))
3536 return -EINVAL;
3537 rdev->ppl.offset = sector - rdev->sb_start;
3538 } else if (!rdev->mddev->external) {
3539 return -EBUSY;
3541 rdev->ppl.sector = sector;
3551 return sprintf(page, "%u\n", rdev->ppl.size);
3560 return -EINVAL;
3562 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3563 rdev->raid_disk >= 0)
3564 return -EBUSY;
3566 if (rdev->mddev->persistent) {
3567 if (rdev->mddev->major_version == 0)
3568 return -EINVAL;
3570 return -EINVAL;
3571 } else if (!rdev->mddev->external) {
3572 return -EBUSY;
3574 rdev->ppl.size = size;
3602 if (!entry->show)
3603 return -EIO;
3604 if (!rdev->mddev)
3605 return -ENODEV;
3606 return entry->show(rdev, page);
3617 struct mddev *mddev = rdev->mddev;
3619 if (!entry->store)
3620 return -EIO;
3622 return -EACCES;
3624 if (entry->store == state_store && cmd_match(page, "remove"))
3627 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3629 if (rdev->mddev == NULL)
3630 rv = -ENODEV;
3632 rv = entry->store(rdev, page, length);
3659 rdev->desc_nr = -1;
3660 rdev->saved_raid_disk = -1;
3661 rdev->raid_disk = -1;
3662 rdev->flags = 0;
3663 rdev->data_offset = 0;
3664 rdev->new_data_offset = 0;
3665 rdev->sb_events = 0;
3666 rdev->last_read_error = 0;
3667 rdev->sb_loaded = 0;
3668 rdev->bb_page = NULL;
3669 atomic_set(&rdev->nr_pending, 0);
3670 atomic_set(&rdev->read_errors, 0);
3671 atomic_set(&rdev->corrected_errors, 0);
3673 INIT_LIST_HEAD(&rdev->same_set);
3674 init_waitqueue_head(&rdev->blocked_wait);
3678 * be used - I wonder if that matters
3680 return badblocks_init(&rdev->badblocks, 0);
3689 * - the device is nonexistent (zero size)
3690 * - the device has no valid superblock
3692 * a faulty rdev _never_ has rdev->sb set.
3703 return ERR_PTR(-ENOMEM);
3712 if (super_format == -2) {
3716 set_bit(Holder, &rdev->flags);
3719 rdev->bdev = blkdev_get_by_dev(newdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
3721 if (IS_ERR(rdev->bdev)) {
3722 pr_warn("md: could not open device unknown-block(%u,%u).\n",
3724 err = PTR_ERR(rdev->bdev);
3728 kobject_init(&rdev->kobj, &rdev_ktype);
3730 size = bdev_nr_bytes(rdev->bdev) >> BLOCK_SIZE_BITS;
3733 rdev->bdev);
3734 err = -EINVAL;
3741 if (err == -EINVAL) {
3743 rdev->bdev,
3749 rdev->bdev);
3757 blkdev_put(rdev->bdev, holder);
3776 switch (super_types[mddev->major_version].
3777 load_super(rdev, freshest, mddev->minor_version)) {
3784 pr_warn("md: fatal superblock inconsistency in %pg -- removing from array\n",
3785 rdev->bdev);
3792 return -EINVAL;
3795 super_types[mddev->major_version].
3800 if (mddev->max_disks &&
3801 (rdev->desc_nr >= mddev->max_disks ||
3802 i > mddev->max_disks)) {
3804 mdname(mddev), rdev->bdev,
3805 mddev->max_disks);
3810 if (super_types[mddev->major_version].
3812 pr_warn("md: kicking non-fresh %pg from array!\n",
3813 rdev->bdev);
3818 if (mddev->level == LEVEL_MULTIPATH) {
3819 rdev->desc_nr = i++;
3820 rdev->raid_disk = rdev->desc_nr;
3821 set_bit(In_sync, &rdev->flags);
3822 } else if (rdev->raid_disk >=
3823 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3824 !test_bit(Journal, &rdev->flags)) {
3825 rdev->raid_disk = -1;
3826 clear_bit(In_sync, &rdev->flags);
3833 /* Read a fixed-point number.
3841 * all without any floating-point arithmetic.
3846 long decimals = -1;
3852 value = *cp - '0';
3862 return -EINVAL;
3865 *res = result * int_pow(10, scale - decimals);
3872 unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
3883 return -EINVAL;
3887 return -EINVAL;
3889 mddev->safemode_delay = 0;
3891 unsigned long old_delay = mddev->safemode_delay;
3896 mddev->safemode_delay = new_delay;
3898 mod_timer(&mddev->safemode_timer, jiffies+1);
3910 spin_lock(&mddev->lock);
3911 p = mddev->pers;
3913 ret = sprintf(page, "%s\n", p->name);
3914 else if (mddev->clevel[0])
3915 ret = sprintf(page, "%s\n", mddev->clevel);
3916 else if (mddev->level != LEVEL_NONE)
3917 ret = sprintf(page, "%d\n", mddev->level);
3920 spin_unlock(&mddev->lock);
3936 return -EINVAL;
3942 if (mddev->pers == NULL) {
3943 strncpy(mddev->clevel, buf, slen);
3944 if (mddev->clevel[slen-1] == '\n')
3945 slen--;
3946 mddev->clevel[slen] = 0;
3947 mddev->level = LEVEL_NONE;
3951 rv = -EROFS;
3956 * - array is not engaged in resync/recovery/reshape
3957 * - old personality can be suspended
3958 * - new personality will access other array.
3961 rv = -EBUSY;
3962 if (mddev->sync_thread ||
3963 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3964 mddev->reshape_position != MaxSector ||
3965 mddev->sysfs_active)
3968 rv = -EINVAL;
3969 if (!mddev->pers->quiesce) {
3971 mdname(mddev), mddev->pers->name);
3977 if (clevel[slen-1] == '\n')
3978 slen--;
3983 if (request_module("md-%s", clevel) != 0)
3984 request_module("md-level-%s", clevel);
3987 if (!pers || !try_module_get(pers->owner)) {
3990 rv = -EINVAL;
3995 if (pers == mddev->pers) {
3997 module_put(pers->owner);
4001 if (!pers->takeover) {
4002 module_put(pers->owner);
4005 rv = -EINVAL;
4010 rdev->new_raid_disk = rdev->raid_disk;
4012 /* ->takeover must set new_* and/or delta_disks
4015 priv = pers->takeover(mddev);
4017 mddev->new_level = mddev->level;
4018 mddev->new_layout = mddev->layout;
4019 mddev->new_chunk_sectors = mddev->chunk_sectors;
4020 mddev->raid_disks -= mddev->delta_disks;
4021 mddev->delta_disks = 0;
4022 mddev->reshape_backwards = 0;
4023 module_put(pers->owner);
4034 spin_lock(&mddev->lock);
4035 oldpers = mddev->pers;
4036 oldpriv = mddev->private;
4037 mddev->pers = pers;
4038 mddev->private = priv;
4039 strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4040 mddev->level = mddev->new_level;
4041 mddev->layout = mddev->new_layout;
4042 mddev->chunk_sectors = mddev->new_chunk_sectors;
4043 mddev->delta_disks = 0;
4044 mddev->reshape_backwards = 0;
4045 mddev->degraded = 0;
4046 spin_unlock(&mddev->lock);
4048 if (oldpers->sync_request == NULL &&
4049 mddev->external) {
4050 /* We are converting from a no-redundancy array
4054 * clean->dirty
4057 mddev->in_sync = 0;
4058 mddev->safemode_delay = 0;
4059 mddev->safemode = 0;
4062 oldpers->free(mddev, oldpriv);
4064 if (oldpers->sync_request == NULL &&
4065 pers->sync_request != NULL) {
4067 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4070 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4071 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4072 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4074 if (oldpers->sync_request != NULL &&
4075 pers->sync_request == NULL) {
4077 if (mddev->to_remove == NULL)
4078 mddev->to_remove = &md_redundancy_group;
4081 module_put(oldpers->owner);
4084 if (rdev->raid_disk < 0)
4086 if (rdev->new_raid_disk >= mddev->raid_disks)
4087 rdev->new_raid_disk = -1;
4088 if (rdev->new_raid_disk == rdev->raid_disk)
4093 if (rdev->raid_disk < 0)
4095 if (rdev->new_raid_disk == rdev->raid_disk)
4097 rdev->raid_disk = rdev->new_raid_disk;
4098 if (rdev->raid_disk < 0)
4099 clear_bit(In_sync, &rdev->flags);
4103 rdev->raid_disk, mdname(mddev));
4107 if (pers->sync_request == NULL) {
4111 mddev->in_sync = 1;
4112 del_timer_sync(&mddev->safemode_timer);
4114 blk_set_stacking_limits(&mddev->queue->limits);
4115 pers->run(mddev);
4116 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4118 if (!mddev->thread)
4120 sysfs_notify_dirent_safe(mddev->sysfs_level);
4135 if (mddev->reshape_position != MaxSector &&
4136 mddev->layout != mddev->new_layout)
4138 mddev->new_layout, mddev->layout);
4139 return sprintf(page, "%d\n", mddev->layout);
4155 if (mddev->pers) {
4156 if (mddev->pers->check_reshape == NULL)
4157 err = -EBUSY;
4159 err = -EROFS;
4161 mddev->new_layout = n;
4162 err = mddev->pers->check_reshape(mddev);
4164 mddev->new_layout = mddev->layout;
4167 mddev->new_layout = n;
4168 if (mddev->reshape_position == MaxSector)
4169 mddev->layout = n;
4180 if (mddev->raid_disks == 0)
4182 if (mddev->reshape_position != MaxSector &&
4183 mddev->delta_disks != 0)
4184 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4185 mddev->raid_disks - mddev->delta_disks);
4186 return sprintf(page, "%d\n", mddev->raid_disks);
4204 if (mddev->pers)
4206 else if (mddev->reshape_position != MaxSector) {
4208 int olddisks = mddev->raid_disks - mddev->delta_disks;
4210 err = -EINVAL;
4213 rdev->data_offset < rdev->new_data_offset)
4216 rdev->data_offset > rdev->new_data_offset)
4220 mddev->delta_disks = n - olddisks;
4221 mddev->raid_disks = n;
4222 mddev->reshape_backwards = (mddev->delta_disks < 0);
4224 mddev->raid_disks = n;
4235 return sprintf(page, "%pU\n", mddev->uuid);
4243 if (mddev->reshape_position != MaxSector &&
4244 mddev->chunk_sectors != mddev->new_chunk_sectors)
4246 mddev->new_chunk_sectors << 9,
4247 mddev->chunk_sectors << 9);
4248 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4264 if (mddev->pers) {
4265 if (mddev->pers->check_reshape == NULL)
4266 err = -EBUSY;
4268 err = -EROFS;
4270 mddev->new_chunk_sectors = n >> 9;
4271 err = mddev->pers->check_reshape(mddev);
4273 mddev->new_chunk_sectors = mddev->chunk_sectors;
4276 mddev->new_chunk_sectors = n >> 9;
4277 if (mddev->reshape_position == MaxSector)
4278 mddev->chunk_sectors = n >> 9;
4289 if (mddev->recovery_cp == MaxSector)
4291 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4307 return -EINVAL;
4313 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4314 err = -EBUSY;
4317 mddev->recovery_cp = n;
4318 if (mddev->pers)
4319 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4344 * read-auto
4347 * clean - no pending writes, but otherwise active.
4351 * if not known, block and switch to write-pending
4357 * write-pending
4360 * active-idle
4364 * Array is failed. It's useful because mounted-arrays aren't stopped
4371 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4372 "write-pending", "active-idle", "broken", NULL };
4388 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4389 switch(mddev->ro) {
4397 spin_lock(&mddev->lock);
4398 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4400 else if (mddev->in_sync)
4402 else if (mddev->safemode)
4406 spin_unlock(&mddev->lock);
4409 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4412 if (list_empty(&mddev->disks) &&
4413 mddev->raid_disks == 0 &&
4414 mddev->dev_sectors == 0)
4432 if (mddev->pers && (st == active || st == clean) &&
4433 mddev->ro != MD_RDONLY) {
4437 spin_lock(&mddev->lock);
4440 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4441 md_wakeup_thread(mddev->thread);
4442 wake_up(&mddev->sb_wait);
4446 err = -EBUSY;
4449 sysfs_notify_dirent_safe(mddev->sysfs_state);
4450 spin_unlock(&mddev->lock);
4456 err = -EINVAL;
4466 if (mddev->pers)
4474 if (mddev->pers)
4477 mddev->ro = MD_RDONLY;
4478 set_disk_ro(mddev->gendisk, 1);
4483 if (mddev->pers) {
4486 else if (mddev->ro == MD_RDONLY)
4489 mddev->ro = MD_AUTO_READ;
4490 set_disk_ro(mddev->gendisk, 0);
4493 mddev->ro = MD_AUTO_READ;
4498 if (mddev->pers) {
4502 spin_lock(&mddev->lock);
4504 err = -EBUSY;
4505 spin_unlock(&mddev->lock);
4507 err = -EINVAL;
4510 if (mddev->pers) {
4514 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4515 wake_up(&mddev->sb_wait);
4518 mddev->ro = MD_RDWR;
4519 set_disk_ro(mddev->gendisk, 0);
4531 if (mddev->hold_active == UNTIL_IOCTL)
4532 mddev->hold_active = 0;
4533 sysfs_notify_dirent_safe(mddev->sysfs_state);
4544 atomic_read(&mddev->max_corr_read_errors));
4557 return -EINVAL;
4558 atomic_set(&mddev->max_corr_read_errors, n);
4569 return -EINVAL;
4590 return -EINVAL;
4593 return -EINVAL;
4597 return -EOVERFLOW;
4602 if (mddev->persistent) {
4603 rdev = md_import_device(dev, mddev->major_version,
4604 mddev->minor_version);
4605 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4607 = list_entry(mddev->disks.next,
4609 err = super_types[mddev->major_version]
4610 .load_super(rdev, rdev0, mddev->minor_version);
4614 } else if (mddev->external)
4615 rdev = md_import_device(dev, -2, -1);
4617 rdev = md_import_device(dev, -1, -1);
4646 if (!mddev->bitmap)
4648 /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
4652 if (*end == '-') { /* range */
4658 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4661 md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
4674 (unsigned long long)mddev->dev_sectors / 2);
4684 * If array is active, we can try an on-line resize
4694 if (mddev->pers) {
4699 if (mddev->dev_sectors == 0 ||
4700 mddev->dev_sectors > sectors)
4701 mddev->dev_sectors = sectors;
4703 err = -ENOSPC;
4721 if (mddev->persistent)
4723 mddev->major_version, mddev->minor_version);
4724 else if (mddev->external)
4725 return sprintf(page, "external:%s\n", mddev->metadata_type);
4744 err = -EBUSY;
4745 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4747 else if (!list_empty(&mddev->disks))
4752 mddev->persistent = 0;
4753 mddev->external = 0;
4754 mddev->major_version = 0;
4755 mddev->minor_version = 90;
4759 size_t namelen = len-9;
4760 if (namelen >= sizeof(mddev->metadata_type))
4761 namelen = sizeof(mddev->metadata_type)-1;
4762 strncpy(mddev->metadata_type, buf+9, namelen);
4763 mddev->metadata_type[namelen] = 0;
4764 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4765 mddev->metadata_type[--namelen] = 0;
4766 mddev->persistent = 0;
4767 mddev->external = 1;
4768 mddev->major_version = 0;
4769 mddev->minor_version = 90;
4773 err = -EINVAL;
4780 err = -ENOENT;
4783 mddev->major_version = major;
4784 mddev->minor_version = minor;
4785 mddev->persistent = 1;
4786 mddev->external = 0;
4800 unsigned long recovery = mddev->recovery;
4816 else if (mddev->reshape_position != MaxSector)
4824 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4834 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
4839 if (work_pending(&mddev->sync_work))
4842 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4847 md_wakeup_thread_directly(mddev->sync_thread);
4854 int sync_seq = atomic_read(&mddev->sync_seq);
4856 mutex_lock(&mddev->sync_mutex);
4857 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4860 wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
4861 !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
4863 mutex_unlock(&mddev->sync_mutex);
4868 mutex_lock(&mddev->sync_mutex);
4869 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4872 wait_event(resync_wait, mddev->sync_thread == NULL &&
4873 !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
4875 mutex_unlock(&mddev->sync_mutex);
4881 if (!mddev->pers || !mddev->pers->sync_request)
4882 return -EINVAL;
4889 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4890 return -EBUSY;
4892 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4894 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4895 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4898 if (mddev->pers->start_reshape == NULL)
4899 return -EINVAL;
4902 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
4903 err = -EBUSY;
4904 } else if (mddev->reshape_position == MaxSector ||
4905 mddev->pers->check_reshape == NULL ||
4906 mddev->pers->check_reshape(mddev)) {
4907 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4908 err = mddev->pers->start_reshape(mddev);
4916 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4922 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4925 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4927 return -EINVAL;
4928 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4929 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4930 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4932 if (mddev->ro == MD_AUTO_READ) {
4934 * canceling read-auto mode
4936 mddev->ro = MD_RDWR;
4937 md_wakeup_thread(mddev->sync_thread);
4939 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4940 md_wakeup_thread(mddev->thread);
4941 sysfs_notify_dirent_safe(mddev->sysfs_action);
4951 return sprintf(page, "%s\n", mddev->last_sync_action);
4961 atomic64_read(&mddev->resync_mismatches));
4970 mddev->sync_speed_min ? "local": "system");
4986 return -EINVAL;
4988 mddev->sync_speed_min = min;
4999 mddev->sync_speed_max ? "local": "system");
5015 return -EINVAL;
5017 mddev->sync_speed_max = max;
5027 return sprintf(page, "%d\n", mddev->degraded);
5034 return sprintf(page, "%d\n", mddev->parallel_resync);
5043 return -EINVAL;
5046 return -EINVAL;
5048 mddev->parallel_resync = n;
5050 if (mddev->sync_thread)
5065 if (mddev->curr_resync == MD_RESYNC_NONE)
5067 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5068 dt = (jiffies - mddev->resync_mark) / HZ;
5070 db = resync - mddev->resync_mark_cnt;
5081 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5084 if (mddev->curr_resync == MD_RESYNC_YIELDED ||
5085 mddev->curr_resync == MD_RESYNC_DELAYED)
5088 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5089 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5090 max_sectors = mddev->resync_max_sectors;
5092 max_sectors = mddev->dev_sectors;
5094 resync = mddev->curr_resync_completed;
5105 (unsigned long long)mddev->resync_min);
5114 return -EINVAL;
5116 spin_lock(&mddev->lock);
5117 err = -EINVAL;
5118 if (min > mddev->resync_max)
5121 err = -EBUSY;
5122 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5126 mddev->resync_min = round_down(min, 8);
5130 spin_unlock(&mddev->lock);
5140 if (mddev->resync_max == MaxSector)
5144 (unsigned long long)mddev->resync_max);
5150 spin_lock(&mddev->lock);
5152 mddev->resync_max = MaxSector;
5157 err = -EINVAL;
5160 if (max < mddev->resync_min)
5163 err = -EBUSY;
5164 if (max < mddev->resync_max && md_is_rdwr(mddev) &&
5165 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5169 chunk = mddev->chunk_sectors;
5173 err = -EINVAL;
5177 mddev->resync_max = max;
5179 wake_up(&mddev->recovery_wait);
5182 spin_unlock(&mddev->lock);
5192 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5205 return -EINVAL;
5210 err = -EINVAL;
5211 if (mddev->pers == NULL ||
5212 mddev->pers->quiesce == NULL)
5215 mddev->suspend_lo = new;
5229 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5242 return -EINVAL;
5247 err = -EINVAL;
5248 if (mddev->pers == NULL)
5252 mddev->suspend_hi = new;
5266 if (mddev->reshape_position != MaxSector)
5268 (unsigned long long)mddev->reshape_position);
5284 return -EINVAL;
5288 err = -EBUSY;
5289 if (mddev->pers)
5291 mddev->reshape_position = new;
5292 mddev->delta_disks = 0;
5293 mddev->reshape_backwards = 0;
5294 mddev->new_level = mddev->level;
5295 mddev->new_layout = mddev->layout;
5296 mddev->new_chunk_sectors = mddev->chunk_sectors;
5298 rdev->new_data_offset = rdev->data_offset;
5313 mddev->reshape_backwards ? "backwards" : "forwards");
5327 return -EINVAL;
5328 if (mddev->reshape_backwards == backwards)
5335 if (mddev->delta_disks)
5336 err = -EBUSY;
5337 else if (mddev->persistent &&
5338 mddev->major_version == 0)
5339 err = -EINVAL;
5341 mddev->reshape_backwards = backwards;
5353 if (mddev->external_size)
5355 (unsigned long long)mddev->array_sectors/2);
5373 return -EINVAL;
5377 if (mddev->pers)
5378 sectors = mddev->pers->size(mddev, 0, 0);
5380 sectors = mddev->array_sectors;
5382 mddev->external_size = 0;
5385 err = -EINVAL;
5386 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5387 err = -E2BIG;
5389 mddev->external_size = 1;
5393 mddev->array_sectors = sectors;
5394 if (mddev->pers)
5395 set_capacity_and_notify(mddev->gendisk,
5396 mddev->array_sectors);
5411 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5413 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5415 } else if (mddev->bitmap) {
5417 } else if (mddev->pers) {
5418 if (mddev->pers->sync_request)
5434 if (mddev->pers) {
5435 if (mddev->pers->change_consistency_policy)
5436 err = mddev->pers->change_consistency_policy(mddev, buf);
5438 err = -EBUSY;
5439 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5440 set_bit(MD_HAS_PPL, &mddev->flags);
5442 err = -EINVAL;
5454 return sprintf(page, "%d\n", mddev->fail_last_dev);
5471 if (value != mddev->fail_last_dev)
5472 mddev->fail_last_dev = value;
5482 if (mddev->pers == NULL || (mddev->pers->level != 1))
5485 return sprintf(page, "%d\n", mddev->serialize_policy);
5502 if (value == mddev->serialize_policy)
5508 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5510 err = -EINVAL;
5519 mddev->serialize_policy = value;
5592 if (!entry->show)
5593 return -EIO;
5597 return -EBUSY;
5601 rv = entry->show(mddev, page);
5614 if (!entry->store)
5615 return -EIO;
5617 return -EACCES;
5621 return -EBUSY;
5624 rv = entry->store(mddev, page, length);
5633 if (mddev->sysfs_state)
5634 sysfs_put(mddev->sysfs_state);
5635 if (mddev->sysfs_level)
5636 sysfs_put(mddev->sysfs_level);
5638 del_gendisk(mddev->gendisk);
5639 put_disk(mddev->gendisk);
5658 kobject_put(&mddev->kobj);
5665 if (mddev->writes_pending.percpu_count_ptr)
5667 if (percpu_ref_init(&mddev->writes_pending, no_op,
5669 return -ENOMEM;
5671 percpu_ref_put(&mddev->writes_pending);
5681 * If dev is non-zero it must be a device number with a MAJOR of
5708 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5710 unit = MINOR(mddev->unit) >> shift;
5719 if (mddev2->gendisk &&
5720 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5722 error = -EEXIST;
5731 mddev->hold_active = UNTIL_STOP;
5733 error = -ENOMEM;
5738 disk->major = MAJOR(mddev->unit);
5739 disk->first_minor = unit << shift;
5740 disk->minors = 1 << shift;
5742 strcpy(disk->disk_name, name);
5744 sprintf(disk->disk_name, "md_d%d", unit);
5746 sprintf(disk->disk_name, "md%d", unit);
5747 disk->fops = &md_fops;
5748 disk->private_data = mddev;
5750 mddev->queue = disk->queue;
5751 blk_set_stacking_limits(&mddev->queue->limits);
5752 blk_queue_write_cache(mddev->queue, true, true);
5753 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5754 mddev->gendisk = disk;
5759 kobject_init(&mddev->kobj, &md_ktype);
5760 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5767 mddev->hold_active = 0;
5773 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5774 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5775 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5819 while (len && val[len-1] == '\n')
5820 len--;
5822 return -E2BIG;
5832 return -EINVAL;
5839 mddev->safemode = 1;
5840 if (mddev->external)
5841 sysfs_notify_dirent_safe(mddev->sysfs_state);
5843 md_wakeup_thread(mddev->thread);
5851 wake_up(&mddev->sb_wait);
5861 if (list_empty(&mddev->disks))
5863 return -EINVAL;
5865 if (mddev->pers)
5866 return -EBUSY;
5868 if (mddev->sysfs_active)
5869 return -EBUSY;
5874 if (!mddev->raid_disks) {
5875 if (!mddev->persistent)
5876 return -EINVAL;
5879 return -EINVAL;
5882 if (mddev->level != LEVEL_NONE)
5883 request_module("md-level-%d", mddev->level);
5884 else if (mddev->clevel[0])
5885 request_module("md-%s", mddev->clevel);
5892 mddev->has_superblocks = false;
5894 if (test_bit(Faulty, &rdev->flags))
5896 sync_blockdev(rdev->bdev);
5897 invalidate_bdev(rdev->bdev);
5898 if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
5899 mddev->ro = MD_RDONLY;
5900 if (mddev->gendisk)
5901 set_disk_ro(mddev->gendisk, 1);
5904 if (rdev->sb_page)
5905 mddev->has_superblocks = true;
5911 if (rdev->meta_bdev) {
5913 } else if (rdev->data_offset < rdev->sb_start) {
5914 if (mddev->dev_sectors &&
5915 rdev->data_offset + mddev->dev_sectors
5916 > rdev->sb_start) {
5919 return -EINVAL;
5922 if (rdev->sb_start + rdev->sb_size/512
5923 > rdev->data_offset) {
5926 return -EINVAL;
5929 sysfs_notify_dirent_safe(rdev->sysfs_state);
5930 nowait = nowait && bdev_nowait(rdev->bdev);
5933 err = percpu_ref_init(&mddev->active_io, active_io_release,
5938 if (!bioset_initialized(&mddev->bio_set)) {
5939 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5943 if (!bioset_initialized(&mddev->sync_set)) {
5944 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5949 if (!bioset_initialized(&mddev->io_clone_set)) {
5950 err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
5957 pers = find_pers(mddev->level, mddev->clevel);
5958 if (!pers || !try_module_get(pers->owner)) {
5960 if (mddev->level != LEVEL_NONE)
5962 mddev->level);
5965 mddev->clevel);
5966 err = -EINVAL;
5970 if (mddev->level != pers->level) {
5971 mddev->level = pers->level;
5972 mddev->new_level = pers->level;
5974 strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5976 if (mddev->reshape_position != MaxSector &&
5977 pers->start_reshape == NULL) {
5979 module_put(pers->owner);
5980 err = -EINVAL;
5984 if (pers->sync_request) {
5994 rdev->bdev->bd_disk ==
5995 rdev2->bdev->bd_disk) {
5998 rdev->bdev,
5999 rdev2->bdev);
6005 pr_warn("True protection against single-disk failure might be compromised.\n");
6008 mddev->recovery = 0;
6009 /* may be over-ridden by personality */
6010 mddev->resync_max_sectors = mddev->dev_sectors;
6012 mddev->ok_start_degraded = start_dirty_degraded;
6015 mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */
6017 err = pers->run(mddev);
6019 pr_warn("md: pers->run() failed ...\n");
6020 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
6021 WARN_ONCE(!mddev->external_size,
6025 (unsigned long long)mddev->array_sectors / 2,
6026 (unsigned long long)pers->size(mddev, 0, 0) / 2);
6027 err = -EINVAL;
6029 if (err == 0 && pers->sync_request &&
6030 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
6033 bitmap = md_bitmap_create(mddev, -1);
6039 mddev->bitmap = bitmap;
6045 if (mddev->bitmap_info.max_write_behind > 0) {
6049 if (test_bit(WriteMostly, &rdev->flags) &&
6053 if (create_pool && mddev->serial_info_pool == NULL) {
6054 mddev->serial_info_pool =
6057 if (!mddev->serial_info_pool) {
6058 err = -ENOMEM;
6064 if (mddev->queue) {
6068 if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
6073 if (mddev->degraded)
6076 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6078 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6079 blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
6083 blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
6085 if (pers->sync_request) {
6086 if (mddev->kobj.sd &&
6087 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6090 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6091 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6092 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6093 } else if (mddev->ro == MD_AUTO_READ)
6094 mddev->ro = MD_RDWR;
6096 atomic_set(&mddev->max_corr_read_errors,
6098 mddev->safemode = 0;
6100 mddev->safemode_delay = 0;
6102 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6103 mddev->in_sync = 1;
6105 spin_lock(&mddev->lock);
6106 mddev->pers = pers;
6107 spin_unlock(&mddev->lock);
6109 if (rdev->raid_disk >= 0)
6112 if (mddev->degraded && md_is_rdwr(mddev))
6114 * via sysfs - until a lack of spares is confirmed.
6116 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6117 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6119 if (mddev->sb_flags)
6127 if (mddev->private)
6128 pers->free(mddev, mddev->private);
6129 mddev->private = NULL;
6130 module_put(pers->owner);
6133 bioset_exit(&mddev->io_clone_set);
6135 bioset_exit(&mddev->sync_set);
6137 bioset_exit(&mddev->bio_set);
6139 percpu_ref_exit(&mddev->active_io);
6148 set_bit(MD_NOT_READY, &mddev->flags);
6164 md_wakeup_thread(mddev->thread);
6165 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
6167 set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
6168 clear_bit(MD_NOT_READY, &mddev->flags);
6169 mddev->changed = 1;
6170 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6171 sysfs_notify_dirent_safe(mddev->sysfs_state);
6172 sysfs_notify_dirent_safe(mddev->sysfs_action);
6173 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6175 clear_bit(MD_NOT_READY, &mddev->flags);
6183 if (mddev->pers->start) {
6184 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6185 md_wakeup_thread(mddev->thread);
6186 ret = mddev->pers->start(mddev);
6187 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6188 md_wakeup_thread(mddev->sync_thread);
6196 struct gendisk *disk = mddev->gendisk;
6202 if (list_empty(&mddev->disks))
6203 return -ENXIO;
6204 if (!mddev->pers)
6205 return -EINVAL;
6207 return -EBUSY;
6211 if (test_bit(Journal, &rdev->flags) &&
6212 !test_bit(Faulty, &rdev->flags))
6218 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6220 return -EINVAL;
6222 return -EROFS;
6224 mddev->safemode = 0;
6225 mddev->ro = MD_RDWR;
6227 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6229 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6230 md_wakeup_thread(mddev->thread);
6231 md_wakeup_thread(mddev->sync_thread);
6232 sysfs_notify_dirent_safe(mddev->sysfs_state);
6238 mddev->array_sectors = 0;
6239 mddev->external_size = 0;
6240 mddev->dev_sectors = 0;
6241 mddev->raid_disks = 0;
6242 mddev->recovery_cp = 0;
6243 mddev->resync_min = 0;
6244 mddev->resync_max = MaxSector;
6245 mddev->reshape_position = MaxSector;
6246 /* we still need mddev->external in export_rdev, do not clear it yet */
6247 mddev->persistent = 0;
6248 mddev->level = LEVEL_NONE;
6249 mddev->clevel[0] = 0;
6255 if (mddev->hold_active)
6256 mddev->flags = 0;
6258 mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
6259 mddev->sb_flags = 0;
6260 mddev->ro = MD_RDWR;
6261 mddev->metadata_type[0] = 0;
6262 mddev->chunk_sectors = 0;
6263 mddev->ctime = mddev->utime = 0;
6264 mddev->layout = 0;
6265 mddev->max_disks = 0;
6266 mddev->events = 0;
6267 mddev->can_decrease_events = 0;
6268 mddev->delta_disks = 0;
6269 mddev->reshape_backwards = 0;
6270 mddev->new_level = LEVEL_NONE;
6271 mddev->new_layout = 0;
6272 mddev->new_chunk_sectors = 0;
6273 mddev->curr_resync = MD_RESYNC_NONE;
6274 atomic64_set(&mddev->resync_mismatches, 0);
6275 mddev->suspend_lo = mddev->suspend_hi = 0;
6276 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6277 mddev->recovery = 0;
6278 mddev->in_sync = 0;
6279 mddev->changed = 0;
6280 mddev->degraded = 0;
6281 mddev->safemode = 0;
6282 mddev->private = NULL;
6283 mddev->cluster_info = NULL;
6284 mddev->bitmap_info.offset = 0;
6285 mddev->bitmap_info.default_offset = 0;
6286 mddev->bitmap_info.default_space = 0;
6287 mddev->bitmap_info.chunksize = 0;
6288 mddev->bitmap_info.daemon_sleep = 0;
6289 mddev->bitmap_info.max_write_behind = 0;
6290 mddev->bitmap_info.nodes = 0;
6295 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6296 if (work_pending(&mddev->sync_work))
6298 if (mddev->sync_thread) {
6299 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6303 del_timer_sync(&mddev->safemode_timer);
6305 if (mddev->pers && mddev->pers->quiesce) {
6306 mddev->pers->quiesce(mddev, 1);
6307 mddev->pers->quiesce(mddev, 0);
6312 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6313 mddev->sb_flags)) {
6316 mddev->in_sync = 1;
6320 mddev->serialize_policy = 0;
6335 if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
6336 mddev->pers->quiesce(mddev, 1);
6337 mddev->pers->quiesce(mddev, 0);
6339 md_unregister_thread(mddev, &mddev->thread);
6340 if (mddev->queue)
6341 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
6346 struct md_personality *pers = mddev->pers;
6349 /* Ensure ->event_work is done */
6350 if (mddev->event_work.func)
6352 spin_lock(&mddev->lock);
6353 mddev->pers = NULL;
6354 spin_unlock(&mddev->lock);
6355 if (mddev->private)
6356 pers->free(mddev, mddev->private);
6357 mddev->private = NULL;
6358 if (pers->sync_request && mddev->to_remove == NULL)
6359 mddev->to_remove = &md_redundancy_group;
6360 module_put(pers->owner);
6361 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6363 percpu_ref_exit(&mddev->active_io);
6364 bioset_exit(&mddev->bio_set);
6365 bioset_exit(&mddev->sync_set);
6366 bioset_exit(&mddev->io_clone_set);
6371 lockdep_assert_held(&mddev->reconfig_mutex);
6374 * This is called from dm-raid
6378 percpu_ref_exit(&mddev->writes_pending);
6388 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6389 return -EBUSY;
6391 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6393 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6394 md_wakeup_thread(mddev->thread);
6396 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6397 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6403 md_wakeup_thread_directly(mddev->sync_thread);
6407 &mddev->recovery));
6408 wait_event(mddev->sb_wait,
6409 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6412 mutex_lock(&mddev->open_mutex);
6413 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6414 mddev->sync_thread ||
6415 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6417 err = -EBUSY;
6421 if (mddev->pers) {
6424 if (mddev->ro == MD_RDONLY) {
6425 err = -ENXIO;
6429 mddev->ro = MD_RDONLY;
6430 set_disk_ro(mddev->gendisk, 1);
6434 if ((mddev->pers && !err) || did_freeze) {
6435 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6436 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6437 md_wakeup_thread(mddev->thread);
6438 sysfs_notify_dirent_safe(mddev->sysfs_state);
6441 mutex_unlock(&mddev->open_mutex);
6446 * 0 - completely stop and dis-assemble array
6447 * 2 - stop but do not disassemble array
6452 struct gendisk *disk = mddev->gendisk;
6456 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6458 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6459 md_wakeup_thread(mddev->thread);
6461 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6462 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6468 md_wakeup_thread_directly(mddev->sync_thread);
6471 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6473 &mddev->recovery)));
6476 mutex_lock(&mddev->open_mutex);
6477 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6478 mddev->sysfs_active ||
6479 mddev->sync_thread ||
6480 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6482 mutex_unlock(&mddev->open_mutex);
6484 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6485 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6486 md_wakeup_thread(mddev->thread);
6488 return -EBUSY;
6490 if (mddev->pers) {
6498 sysfs_notify_dirent_safe(mddev->sysfs_state);
6501 if (rdev->raid_disk >= 0)
6505 mutex_unlock(&mddev->open_mutex);
6506 mddev->changed = 1;
6509 mddev->ro = MD_RDWR;
6511 mutex_unlock(&mddev->open_mutex);
6518 if (mddev->bitmap_info.file) {
6519 struct file *f = mddev->bitmap_info.file;
6520 spin_lock(&mddev->lock);
6521 mddev->bitmap_info.file = NULL;
6522 spin_unlock(&mddev->lock);
6525 mddev->bitmap_info.offset = 0;
6530 if (mddev->hold_active == UNTIL_STOP)
6531 mddev->hold_active = 0;
6534 sysfs_notify_dirent_safe(mddev->sysfs_state);
6544 if (list_empty(&mddev->disks))
6550 pr_cont("<%pg>", rdev->bdev);
6586 pr_debug("md: considering %pg ...\n", rdev0->bdev);
6591 rdev->bdev);
6592 list_move(&rdev->same_set, &candidates);
6601 rdev0->preferred_minor << MdpMinorShift);
6604 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6607 if (rdev0->preferred_minor != unit) {
6609 rdev0->bdev, rdev0->preferred_minor);
6619 else if (mddev->raid_disks || mddev->major_version
6620 || !list_empty(&mddev->disks)) {
6622 mdname(mddev), rdev0->bdev);
6626 mddev->persistent = 1;
6628 list_del_init(&rdev->same_set);
6639 list_del_init(&rdev->same_set);
6657 return -EFAULT;
6672 if (test_bit(Faulty, &rdev->flags))
6676 if (test_bit(In_sync, &rdev->flags))
6678 else if (test_bit(Journal, &rdev->flags))
6687 info.major_version = mddev->major_version;
6688 info.minor_version = mddev->minor_version;
6690 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6691 info.level = mddev->level;
6692 info.size = mddev->dev_sectors / 2;
6693 if (info.size != mddev->dev_sectors / 2) /* overflow */
6694 info.size = -1;
6696 info.raid_disks = mddev->raid_disks;
6697 info.md_minor = mddev->md_minor;
6698 info.not_persistent= !mddev->persistent;
6700 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6702 if (mddev->in_sync)
6704 if (mddev->bitmap && mddev->bitmap_info.offset)
6713 info.layout = mddev->layout;
6714 info.chunk_size = mddev->chunk_sectors << 9;
6717 return -EFAULT;
6730 return -ENOMEM;
6733 spin_lock(&mddev->lock);
6735 if (mddev->bitmap_info.file) {
6736 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6737 sizeof(file->pathname));
6741 memmove(file->pathname, ptr,
6742 sizeof(file->pathname)-(ptr-file->pathname));
6744 spin_unlock(&mddev->lock);
6748 err = -EFAULT;
6760 return -EFAULT;
6765 info.major = MAJOR(rdev->bdev->bd_dev);
6766 info.minor = MINOR(rdev->bdev->bd_dev);
6767 info.raid_disk = rdev->raid_disk;
6769 if (test_bit(Faulty, &rdev->flags))
6771 else if (test_bit(In_sync, &rdev->flags)) {
6775 if (test_bit(Journal, &rdev->flags))
6777 if (test_bit(WriteMostly, &rdev->flags))
6779 if (test_bit(FailFast, &rdev->flags))
6783 info.raid_disk = -1;
6789 return -EFAULT;
6797 dev_t dev = MKDEV(info->major,info->minor);
6800 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6803 return -EINVAL;
6806 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6807 return -EOVERFLOW;
6809 if (!mddev->raid_disks) {
6812 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6818 if (!list_empty(&mddev->disks)) {
6820 = list_entry(mddev->disks.next,
6822 err = super_types[mddev->major_version]
6823 .load_super(rdev, rdev0, mddev->minor_version);
6826 rdev->bdev,
6827 rdev0->bdev);
6829 return -EINVAL;
6843 if (mddev->pers) {
6845 if (!mddev->pers->hot_add_disk) {
6848 return -EINVAL;
6850 if (mddev->persistent)
6851 rdev = md_import_device(dev, mddev->major_version,
6852 mddev->minor_version);
6854 rdev = md_import_device(dev, -1, -1);
6861 if (!mddev->persistent) {
6862 if (info->state & (1<<MD_DISK_SYNC) &&
6863 info->raid_disk < mddev->raid_disks) {
6864 rdev->raid_disk = info->raid_disk;
6865 clear_bit(Bitmap_sync, &rdev->flags);
6867 rdev->raid_disk = -1;
6868 rdev->saved_raid_disk = rdev->raid_disk;
6870 super_types[mddev->major_version].
6872 if ((info->state & (1<<MD_DISK_SYNC)) &&
6873 rdev->raid_disk != info->raid_disk) {
6874 /* This was a hot-add request, but events doesn't
6878 return -EINVAL;
6881 clear_bit(In_sync, &rdev->flags); /* just to be sure */
6882 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6883 set_bit(WriteMostly, &rdev->flags);
6885 clear_bit(WriteMostly, &rdev->flags);
6886 if (info->state & (1<<MD_DISK_FAILFAST))
6887 set_bit(FailFast, &rdev->flags);
6889 clear_bit(FailFast, &rdev->flags);
6891 if (info->state & (1<<MD_DISK_JOURNAL)) {
6897 if (test_bit(Journal, &rdev2->flags)) {
6902 if (has_journal || mddev->bitmap) {
6904 return -EBUSY;
6906 set_bit(Journal, &rdev->flags);
6912 if (info->state & (1 << MD_DISK_CANDIDATE))
6913 set_bit(Candidate, &rdev->flags);
6914 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6915 /* --add initiated by this node */
6916 err = md_cluster_ops->add_new_disk(mddev, rdev);
6924 rdev->raid_disk = -1;
6931 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6933 err = md_cluster_ops->new_disk_ack(mddev,
6940 md_cluster_ops->add_new_disk_cancel(mddev);
6954 if (mddev->major_version != 0) {
6956 return -EINVAL;
6959 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6961 rdev = md_import_device(dev, -1, 0);
6967 rdev->desc_nr = info->number;
6968 if (info->raid_disk < mddev->raid_disks)
6969 rdev->raid_disk = info->raid_disk;
6971 rdev->raid_disk = -1;
6973 if (rdev->raid_disk < mddev->raid_disks)
6974 if (info->state & (1<<MD_DISK_SYNC))
6975 set_bit(In_sync, &rdev->flags);
6977 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6978 set_bit(WriteMostly, &rdev->flags);
6979 if (info->state & (1<<MD_DISK_FAILFAST))
6980 set_bit(FailFast, &rdev->flags);
6982 if (!mddev->persistent) {
6984 rdev->sb_start = bdev_nr_sectors(rdev->bdev);
6986 rdev->sb_start = calc_dev_sboffset(rdev);
6987 rdev->sectors = rdev->sb_start;
7003 if (!mddev->pers)
7004 return -ENODEV;
7008 return -ENXIO;
7010 if (rdev->raid_disk < 0)
7013 clear_bit(Blocked, &rdev->flags);
7016 if (rdev->raid_disk >= 0)
7021 if (md_cluster_ops->remove_disk(mddev, rdev))
7026 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7027 if (mddev->thread)
7028 md_wakeup_thread(mddev->thread);
7036 rdev->bdev, mdname(mddev));
7037 return -EBUSY;
7045 if (!mddev->pers)
7046 return -ENODEV;
7048 if (mddev->major_version != 0) {
7049 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
7051 return -EINVAL;
7053 if (!mddev->pers->hot_add_disk) {
7056 return -EINVAL;
7059 rdev = md_import_device(dev, -1, 0);
7063 return -EINVAL;
7066 if (mddev->persistent)
7067 rdev->sb_start = calc_dev_sboffset(rdev);
7069 rdev->sb_start = bdev_nr_sectors(rdev->bdev);
7071 rdev->sectors = rdev->sb_start;
7073 if (test_bit(Faulty, &rdev->flags)) {
7074 pr_warn("md: can not hot-add faulty %pg disk to %s!\n",
7075 rdev->bdev, mdname(mddev));
7076 err = -EINVAL;
7080 clear_bit(In_sync, &rdev->flags);
7081 rdev->desc_nr = -1;
7082 rdev->saved_raid_disk = -1;
7092 rdev->raid_disk = -1;
7094 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7095 if (!mddev->thread)
7101 if (!bdev_nowait(rdev->bdev)) {
7103 mdname(mddev), rdev->bdev);
7104 blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue);
7110 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7111 md_wakeup_thread(mddev->thread);
7124 if (mddev->pers) {
7125 if (!mddev->pers->quiesce || !mddev->thread)
7126 return -EBUSY;
7127 if (mddev->recovery || mddev->sync_thread)
7128 return -EBUSY;
7136 if (mddev->bitmap || mddev->bitmap_info.file)
7137 return -EEXIST; /* cannot add when bitmap is present */
7142 return -EINVAL;
7152 return -EBADF;
7155 inode = f->f_mapping->host;
7156 if (!S_ISREG(inode->i_mode)) {
7159 err = -EBADF;
7160 } else if (!(f->f_mode & FMODE_WRITE)) {
7163 err = -EBADF;
7164 } else if (atomic_read(&inode->i_writecount) != 1) {
7167 err = -EBUSY;
7173 mddev->bitmap_info.file = f;
7174 mddev->bitmap_info.offset = 0; /* file overrides offset */
7175 } else if (mddev->bitmap == NULL)
7176 return -ENOENT; /* cannot remove what isn't there */
7178 if (mddev->pers) {
7182 bitmap = md_bitmap_create(mddev, -1);
7185 mddev->bitmap = bitmap;
7191 fd = -1;
7201 struct file *f = mddev->bitmap_info.file;
7203 spin_lock(&mddev->lock);
7204 mddev->bitmap_info.file = NULL;
7205 spin_unlock(&mddev->lock);
7219 * This will always create an array with a type-0.90.0 superblock.
7222 * use to determine which style super-blocks are to be found on the devices.
7228 if (info->raid_disks == 0) {
7230 if (info->major_version < 0 ||
7231 info->major_version >= ARRAY_SIZE(super_types) ||
7232 super_types[info->major_version].name == NULL) {
7233 /* maybe try to auto-load a module? */
7235 info->major_version);
7236 return -EINVAL;
7238 mddev->major_version = info->major_version;
7239 mddev->minor_version = info->minor_version;
7240 mddev->patch_version = info->patch_version;
7241 mddev->persistent = !info->not_persistent;
7245 mddev->ctime = ktime_get_real_seconds();
7248 mddev->major_version = MD_MAJOR_VERSION;
7249 mddev->minor_version = MD_MINOR_VERSION;
7250 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7251 mddev->ctime = ktime_get_real_seconds();
7253 mddev->level = info->level;
7254 mddev->clevel[0] = 0;
7255 mddev->dev_sectors = 2 * (sector_t)info->size;
7256 mddev->raid_disks = info->raid_disks;
7260 if (info->state & (1<<MD_SB_CLEAN))
7261 mddev->recovery_cp = MaxSector;
7263 mddev->recovery_cp = 0;
7264 mddev->persistent = ! info->not_persistent;
7265 mddev->external = 0;
7267 mddev->layout = info->layout;
7268 if (mddev->level == 0)
7270 mddev->layout = -1;
7271 mddev->chunk_sectors = info->chunk_size >> 9;
7273 if (mddev->persistent) {
7274 mddev->max_disks = MD_SB_DISKS;
7275 mddev->flags = 0;
7276 mddev->sb_flags = 0;
7278 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7280 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7281 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7282 mddev->bitmap_info.offset = 0;
7284 mddev->reshape_position = MaxSector;
7289 get_random_bytes(mddev->uuid, 16);
7291 mddev->new_level = mddev->level;
7292 mddev->new_chunk_sectors = mddev->chunk_sectors;
7293 mddev->new_layout = mddev->layout;
7294 mddev->delta_disks = 0;
7295 mddev->reshape_backwards = 0;
7302 lockdep_assert_held(&mddev->reconfig_mutex);
7304 if (mddev->external_size)
7307 mddev->array_sectors = array_sectors;
7316 sector_t old_dev_sectors = mddev->dev_sectors;
7318 if (mddev->pers->resize == NULL)
7319 return -EINVAL;
7329 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7330 mddev->sync_thread)
7331 return -EBUSY;
7333 return -EROFS;
7336 sector_t avail = rdev->sectors;
7341 return -ENOSPC;
7343 rv = mddev->pers->resize(mddev, num_sectors);
7346 md_cluster_ops->update_size(mddev, old_dev_sectors);
7347 else if (mddev->queue) {
7348 set_capacity_and_notify(mddev->gendisk,
7349 mddev->array_sectors);
7360 if (mddev->pers->check_reshape == NULL)
7361 return -EINVAL;
7363 return -EROFS;
7365 (mddev->max_disks && raid_disks >= mddev->max_disks))
7366 return -EINVAL;
7367 if (mddev->sync_thread ||
7368 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7369 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7370 mddev->reshape_position != MaxSector)
7371 return -EBUSY;
7374 if (mddev->raid_disks < raid_disks &&
7375 rdev->data_offset < rdev->new_data_offset)
7376 return -EINVAL;
7377 if (mddev->raid_disks > raid_disks &&
7378 rdev->data_offset > rdev->new_data_offset)
7379 return -EINVAL;
7382 mddev->delta_disks = raid_disks - mddev->raid_disks;
7383 if (mddev->delta_disks < 0)
7384 mddev->reshape_backwards = 1;
7385 else if (mddev->delta_disks > 0)
7386 mddev->reshape_backwards = 0;
7388 rv = mddev->pers->check_reshape(mddev);
7390 mddev->delta_disks = 0;
7391 mddev->reshape_backwards = 0;
7398 * on-line array.
7411 if (mddev->bitmap && mddev->bitmap_info.offset)
7414 if (mddev->major_version != info->major_version ||
7415 mddev->minor_version != info->minor_version ||
7416 /* mddev->patch_version != info->patch_version || */
7417 mddev->ctime != info->ctime ||
7418 mddev->level != info->level ||
7419 /* mddev->layout != info->layout || */
7420 mddev->persistent != !info->not_persistent ||
7421 mddev->chunk_sectors != info->chunk_size >> 9 ||
7423 ((state^info->state) & 0xfffffe00)
7425 return -EINVAL;
7427 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7429 if (mddev->raid_disks != info->raid_disks)
7431 if (mddev->layout != info->layout)
7433 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7438 return -EINVAL;
7440 if (mddev->layout != info->layout) {
7445 if (mddev->pers->check_reshape == NULL)
7446 return -EINVAL;
7448 mddev->new_layout = info->layout;
7449 rv = mddev->pers->check_reshape(mddev);
7451 mddev->new_layout = mddev->layout;
7455 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7456 rv = update_size(mddev, (sector_t)info->size * 2);
7458 if (mddev->raid_disks != info->raid_disks)
7459 rv = update_raid_disks(mddev, info->raid_disks);
7461 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7462 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7463 rv = -EINVAL;
7466 if (mddev->recovery || mddev->sync_thread) {
7467 rv = -EBUSY;
7470 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7473 if (mddev->bitmap) {
7474 rv = -EEXIST;
7477 if (mddev->bitmap_info.default_offset == 0) {
7478 rv = -EINVAL;
7481 mddev->bitmap_info.offset =
7482 mddev->bitmap_info.default_offset;
7483 mddev->bitmap_info.space =
7484 mddev->bitmap_info.default_space;
7485 bitmap = md_bitmap_create(mddev, -1);
7488 mddev->bitmap = bitmap;
7497 if (!mddev->bitmap) {
7498 rv = -ENOENT;
7501 if (mddev->bitmap->storage.file) {
7502 rv = -EINVAL;
7505 if (mddev->bitmap_info.nodes) {
7507 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7509 rv = -EPERM;
7510 md_cluster_ops->unlock_all_bitmaps(mddev);
7514 mddev->bitmap_info.nodes = 0;
7515 md_cluster_ops->leave(mddev);
7517 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7522 mddev->bitmap_info.offset = 0;
7536 if (mddev->pers == NULL)
7537 return -ENODEV;
7542 err = -ENODEV;
7545 if (test_bit(MD_BROKEN, &mddev->flags))
7546 err = -EBUSY;
7556 * dosfs just mad... ;-)
7560 struct mddev *mddev = bdev->bd_disk->private_data;
7562 geo->heads = 2;
7563 geo->sectors = 4;
7564 geo->cylinders = mddev->array_sectors / 8;
7600 return -EFAULT;
7602 if (mddev->pers) {
7609 if (!list_empty(&mddev->disks)) {
7611 return -EBUSY;
7614 if (mddev->raid_disks) {
7616 return -EBUSY;
7634 return -ENOTTY;
7643 return -EACCES;
7661 mddev = bdev->bd_disk->private_data;
7666 if (!mddev->raid_disks && !mddev->external)
7667 err = -ENODEV;
7673 if (!mddev->raid_disks && !mddev->external)
7674 err = -ENODEV;
7690 /* Need to flush page cache, and ensure no-one else opens
7693 mutex_lock(&mddev->open_mutex);
7694 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7695 mutex_unlock(&mddev->open_mutex);
7696 err = -EBUSY;
7699 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7700 mutex_unlock(&mddev->open_mutex);
7701 err = -EBUSY;
7704 mutex_unlock(&mddev->open_mutex);
7724 if ((!mddev->raid_disks && !mddev->external)
7728 err = -ENODEV;
7733 * Commands even a read-only array can execute:
7753 /* We can support ADD_NEW_DISK on read-only arrays
7754 * only if we are re-adding a preexisting device.
7755 * So require mddev->pers and MD_DISK_SYNC.
7757 if (mddev->pers) {
7760 err = -EFAULT;
7762 /* Need to clear read-only for this */
7773 * superblock, so we do not allow them on read-only arrays.
7775 if (!md_is_rdwr(mddev) && mddev->pers) {
7776 if (mddev->ro != MD_AUTO_READ) {
7777 err = -EROFS;
7780 mddev->ro = MD_RDWR;
7781 sysfs_notify_dirent_safe(mddev->sysfs_state);
7782 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7784 /* If a device failed while we were read-only, we
7787 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7789 wait_event(mddev->sb_wait,
7790 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7791 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7801 err = -EFAULT;
7809 md_cluster_ops->new_disk_ack(mddev, false);
7811 err = -EINVAL;
7827 err = -EINVAL;
7832 if (mddev->hold_active == UNTIL_IOCTL &&
7833 err != -EINVAL)
7834 mddev->hold_active = 0;
7838 clear_bit(MD_CLOSING, &mddev->flags);
7863 struct mddev *mddev = bdev->bd_disk->private_data;
7870 if (!mddev->raid_disks && !mddev->external) {
7871 err = -ENODEV;
7876 * Transitioning to read-auto need only happen for arrays that call
7879 if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {
7883 mddev->ro = MD_AUTO_READ;
7897 mddev = mddev_get(disk->private_data);
7900 return -ENODEV;
7902 err = mutex_lock_interruptible(&mddev->open_mutex);
7906 err = -ENODEV;
7907 if (test_bit(MD_CLOSING, &mddev->flags))
7910 atomic_inc(&mddev->openers);
7911 mutex_unlock(&mddev->open_mutex);
7917 mutex_unlock(&mddev->open_mutex);
7925 struct mddev *mddev = disk->private_data;
7928 atomic_dec(&mddev->openers);
7934 struct mddev *mddev = disk->private_data;
7937 if (mddev->changed)
7939 mddev->changed = 0;
7945 struct mddev *mddev = disk->private_data;
7947 percpu_ref_exit(&mddev->writes_pending);
7972 * md_thread is a 'system-thread', it's priority should be very
7987 * we don't add to the load-average.
7995 (thread->wqueue,
7996 test_bit(THREAD_WAKEUP, &thread->flags)
7998 thread->timeout);
8000 clear_bit(THREAD_WAKEUP, &thread->flags);
8004 thread->run(thread);
8017 wake_up_process(t->tsk);
8028 pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
8029 set_bit(THREAD_WAKEUP, &t->flags);
8030 wake_up(&t->wqueue);
8045 init_waitqueue_head(&thread->wqueue);
8047 thread->run = run;
8048 thread->mddev = mddev;
8049 thread->timeout = MAX_SCHEDULE_TIMEOUT;
8050 thread->tsk = kthread_run(md_thread, thread,
8052 mdname(thread->mddev),
8054 if (IS_ERR(thread->tsk)) {
8065 lockdep_is_held(&mddev->reconfig_mutex));
8073 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
8074 kthread_stop(thread->tsk);
8081 if (!rdev || test_bit(Faulty, &rdev->flags))
8084 if (!mddev->pers || !mddev->pers->error_handler)
8086 mddev->pers->error_handler(mddev, rdev);
8088 if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
8091 if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
8092 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8093 sysfs_notify_dirent_safe(rdev->sysfs_state);
8094 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8095 if (!test_bit(MD_BROKEN, &mddev->flags)) {
8096 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8097 md_wakeup_thread(mddev->thread);
8099 if (mddev->event_work.func)
8100 queue_work(md_misc_wq, &mddev->event_work);
8116 seq_printf(seq, "%pg ", rdev->bdev);
8131 seq_printf(seq, "[%s] ", pers->name);
8145 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8146 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8147 max_sectors = mddev->resync_max_sectors;
8149 max_sectors = mddev->dev_sectors;
8151 resync = mddev->curr_resync;
8153 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8159 res = atomic_read(&mddev->recovery_active);
8165 if (resync < res || resync - res < MD_RESYNC_ACTIVE)
8168 resync -= res;
8172 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8176 if (rdev->raid_disk >= 0 &&
8177 !test_bit(Faulty, &rdev->flags) &&
8178 rdev->recovery_offset != MaxSector &&
8179 rdev->recovery_offset) {
8183 if (mddev->reshape_position != MaxSector)
8189 if (mddev->recovery_cp < MaxSector) {
8216 int i, x = per_milli/50, y = 20-x;
8226 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8228 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8230 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8253 dt = ((jiffies - mddev->resync_mark) / HZ);
8256 curr_mark_cnt = mddev->curr_mark_cnt;
8257 recovery_active = atomic_read(&mddev->recovery_active);
8258 resync_mark_cnt = mddev->resync_mark_cnt;
8261 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8263 rt = max_sectors - resync; /* number of remaining sectors */
8278 seq->poll_event = atomic_read(&md_event_count);
8302 err = md_bitmap_get_stats(mddev->bitmap, &stats);
8306 chunk_kb = mddev->bitmap_info.chunksize >> 10;
8307 used_pages = stats.pages - stats.missing_pages;
8310 used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10),
8311 chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
8342 mutex_lock(&mddev->bitmap_info.mutex);
8344 spin_lock(&mddev->lock);
8345 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8347 mddev->pers ? "" : "in");
8348 if (mddev->pers) {
8349 if (mddev->ro == MD_RDONLY)
8350 seq_printf(seq, " (read-only)");
8351 if (mddev->ro == MD_AUTO_READ)
8352 seq_printf(seq, " (auto-read-only)");
8353 seq_printf(seq, " %s", mddev->pers->name);
8359 seq_printf(seq, " %pg[%d]", rdev->bdev, rdev->desc_nr);
8361 if (test_bit(WriteMostly, &rdev->flags))
8363 if (test_bit(Journal, &rdev->flags))
8365 if (test_bit(Faulty, &rdev->flags)) {
8369 if (rdev->raid_disk < 0)
8371 if (test_bit(Replacement, &rdev->flags))
8373 sectors += rdev->sectors;
8377 if (!list_empty(&mddev->disks)) {
8378 if (mddev->pers)
8381 mddev->array_sectors / 2);
8386 if (mddev->persistent) {
8387 if (mddev->major_version != 0 ||
8388 mddev->minor_version != 90) {
8390 mddev->major_version,
8391 mddev->minor_version);
8393 } else if (mddev->external)
8395 mddev->metadata_type);
8397 seq_printf(seq, " super non-persistent");
8399 if (mddev->pers) {
8400 mddev->pers->status(seq, mddev);
8402 if (mddev->pers->sync_request) {
8413 spin_unlock(&mddev->lock);
8414 mutex_unlock(&mddev->bitmap_info.mutex);
8420 if (atomic_dec_and_test(&mddev->active))
8442 seq = file->private_data;
8443 seq->poll_event = atomic_read(&md_event_count);
8450 struct seq_file *seq = filp->private_data;
8460 if (seq->poll_event != atomic_read(&md_event_count))
8476 p->name, p->level);
8478 list_add_tail(&p->list, &pers_list);
8486 pr_debug("md: %s personality unregistered\n", p->name);
8488 list_del_init(&p->list);
8500 ret = -EALREADY;
8523 request_module("md-cluster");
8527 pr_warn("can't find md-cluster module or get its reference.\n");
8529 return -ENOENT;
8533 ret = md_cluster_ops->join(mddev, nodes);
8535 mddev->safemode_delay = 0;
8543 md_cluster_ops->leave(mddev);
8556 struct gendisk *disk = rdev->bdev->bd_disk;
8557 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
8558 atomic_read(&disk->sync_io);
8564 * non-sync IO will cause disk_stat to increase without
8568 * the array to appear non-idle, and resync will slow
8572 * completing might cause the array to appear non-idle
8574 * not have been non-resync activity. This will only
8581 if (init || curr_events - rdev->last_events > 64) {
8582 rdev->last_events = curr_events;
8593 atomic_sub(blocks, &mddev->recovery_active);
8594 wake_up(&mddev->recovery_wait);
8596 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8597 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8598 md_wakeup_thread(mddev->thread);
8618 BUG_ON(mddev->ro == MD_RDONLY);
8619 if (mddev->ro == MD_AUTO_READ) {
8621 mddev->ro = MD_RDWR;
8622 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8623 md_wakeup_thread(mddev->thread);
8624 md_wakeup_thread(mddev->sync_thread);
8628 percpu_ref_get(&mddev->writes_pending);
8630 if (mddev->safemode == 1)
8631 mddev->safemode = 0;
8632 /* sync_checkers is always 0 when writes_pending is in per-cpu mode */
8633 if (mddev->in_sync || mddev->sync_checkers) {
8634 spin_lock(&mddev->lock);
8635 if (mddev->in_sync) {
8636 mddev->in_sync = 0;
8637 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8638 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8639 md_wakeup_thread(mddev->thread);
8642 spin_unlock(&mddev->lock);
8646 sysfs_notify_dirent_safe(mddev->sysfs_state);
8647 if (!mddev->has_superblocks)
8649 wait_event(mddev->sb_wait,
8650 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8652 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8653 percpu_ref_put(&mddev->writes_pending);
8672 WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));
8673 percpu_ref_get(&mddev->writes_pending);
8679 percpu_ref_put(&mddev->writes_pending);
8681 if (mddev->safemode == 2)
8682 md_wakeup_thread(mddev->thread);
8683 else if (mddev->safemode_delay)
8685 * every ->safemode_delay jiffies
8687 mod_timer(&mddev->safemode_timer,
8688 roundup(jiffies, mddev->safemode_delay) +
8689 mddev->safemode_delay);
8700 if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
8706 if (mddev->gendisk)
8708 disk_devt(mddev->gendisk),
8709 bio->bi_iter.bi_sector);
8717 if (mddev->pers->bitmap_sector)
8718 mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
8719 &md_io_clone->sectors);
8721 md_bitmap_startwrite(mddev->bitmap, md_io_clone->offset,
8722 md_io_clone->sectors);
8727 md_bitmap_endwrite(mddev->bitmap, md_io_clone->offset,
8728 md_io_clone->sectors);
8733 struct md_io_clone *md_io_clone = bio->bi_private;
8734 struct bio *orig_bio = md_io_clone->orig_bio;
8735 struct mddev *mddev = md_io_clone->mddev;
8737 if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
8740 if (bio->bi_status && !orig_bio->bi_status)
8741 orig_bio->bi_status = bio->bi_status;
8743 if (md_io_clone->start_time)
8744 bio_end_io_acct(orig_bio, md_io_clone->start_time);
8748 percpu_ref_put(&mddev->active_io);
8753 struct block_device *bdev = (*bio)->bi_bdev;
8756 bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
8759 md_io_clone->orig_bio = *bio;
8760 md_io_clone->mddev = mddev;
8761 if (blk_queue_io_stat(bdev->bd_disk->queue))
8762 md_io_clone->start_time = bio_start_io_acct(*bio);
8764 if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
8765 md_io_clone->offset = (*bio)->bi_iter.bi_sector;
8766 md_io_clone->sectors = bio_sectors(*bio);
8770 clone->bi_end_io = md_end_clone_io;
8771 clone->bi_private = md_io_clone;
8777 percpu_ref_get(&mddev->active_io);
8790 if (!mddev->pers)
8794 if (!mddev->pers->sync_request)
8797 spin_lock(&mddev->lock);
8798 if (mddev->in_sync) {
8799 mddev->in_sync = 0;
8800 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8801 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8802 if (mddev->safemode_delay &&
8803 mddev->safemode == 0)
8804 mddev->safemode = 1;
8805 spin_unlock(&mddev->lock);
8807 sysfs_notify_dirent_safe(mddev->sysfs_state);
8809 wait_event(mddev->sb_wait,
8810 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8812 spin_unlock(&mddev->lock);
8821 struct mddev *mddev = thread->mddev;
8837 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8838 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8840 if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
8841 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8846 ret = md_cluster_ops->resync_start(mddev);
8850 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8851 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8852 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8853 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8854 && ((unsigned long long)mddev->curr_resync_completed
8855 < (unsigned long long)mddev->resync_max_sectors))
8859 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8860 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8861 desc = "data-check";
8863 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8864 desc = "requested-resync";
8868 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8873 mddev->last_sync_action = action ?: desc;
8886 int mddev2_minor = -1;
8887 mddev->curr_resync = MD_RESYNC_DELAYED;
8890 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8894 if (test_bit(MD_DELETED, &mddev2->flags))
8898 if (!mddev->parallel_resync
8899 && mddev2->curr_resync
8903 mddev->curr_resync == MD_RESYNC_DELAYED) {
8905 mddev->curr_resync = MD_RESYNC_YIELDED;
8909 mddev->curr_resync == MD_RESYNC_YIELDED)
8919 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8920 mddev2->curr_resync >= mddev->curr_resync) {
8921 if (mddev2_minor != mddev2->md_minor) {
8922 mddev2_minor = mddev2->md_minor;
8939 } while (mddev->curr_resync < MD_RESYNC_DELAYED);
8942 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8946 max_sectors = mddev->resync_max_sectors;
8947 atomic64_set(&mddev->resync_mismatches, 0);
8949 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8950 j = mddev->resync_min;
8951 else if (!mddev->bitmap)
8952 j = mddev->recovery_cp;
8954 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8955 max_sectors = mddev->resync_max_sectors;
8962 mddev->reshape_position != MaxSector)
8963 j = mddev->reshape_position;
8966 max_sectors = mddev->dev_sectors;
8970 if (rdev->raid_disk >= 0 &&
8971 !test_bit(Journal, &rdev->flags) &&
8972 !test_bit(Faulty, &rdev->flags) &&
8973 !test_bit(In_sync, &rdev->flags) &&
8974 rdev->recovery_offset < j)
8975 j = rdev->recovery_offset;
8986 if (mddev->bitmap) {
8987 mddev->pers->quiesce(mddev, 1);
8988 mddev->pers->quiesce(mddev, 0);
9005 mddev->resync_mark = mark[last_mark];
9006 mddev->resync_mark_cnt = mark_cnt[last_mark];
9015 atomic_set(&mddev->recovery_active, 0);
9021 mddev->curr_resync = j;
9023 mddev->curr_resync = MD_RESYNC_ACTIVE; /* no longer delayed */
9024 mddev->curr_resync_completed = j;
9025 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9035 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9036 ((mddev->curr_resync > mddev->curr_resync_completed &&
9037 (mddev->curr_resync - mddev->curr_resync_completed)
9040 (j - mddev->curr_resync_completed)*2
9041 >= mddev->resync_max - mddev->curr_resync_completed ||
9042 mddev->curr_resync_completed > mddev->resync_max
9045 wait_event(mddev->recovery_wait,
9046 atomic_read(&mddev->recovery_active) == 0);
9047 mddev->curr_resync_completed = j;
9048 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
9049 j > mddev->recovery_cp)
9050 mddev->recovery_cp = j;
9052 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
9053 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9056 while (j >= mddev->resync_max &&
9057 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9058 /* As this condition is controlled by user-space,
9063 wait_event_interruptible(mddev->recovery_wait,
9064 mddev->resync_max > j
9066 &mddev->recovery));
9069 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9072 sectors = mddev->pers->sync_request(mddev, j, &skipped);
9074 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9080 atomic_add(sectors, &mddev->recovery_active);
9083 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9091 mddev->curr_resync = j;
9092 mddev->curr_mark_cnt = io_sectors;
9108 mddev->resync_mark = mark[next];
9109 mddev->resync_mark_cnt = mark_cnt[next];
9111 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
9115 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9120 * the 'hard' speed limit, or the system was IO-idle for
9122 * the system might be non-idle CPU-wise, but we only care
9128 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
9129 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
9130 /((jiffies-mddev->resync_mark)/HZ +1) +1;
9142 wait_event(mddev->recovery_wait,
9143 !atomic_read(&mddev->recovery_active));
9148 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
9154 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
9156 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9157 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9158 mddev->curr_resync >= MD_RESYNC_ACTIVE) {
9159 mddev->curr_resync_completed = mddev->curr_resync;
9160 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9162 mddev->pers->sync_request(mddev, max_sectors, &skipped);
9164 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9165 mddev->curr_resync > MD_RESYNC_ACTIVE) {
9166 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9167 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9168 if (mddev->curr_resync >= mddev->recovery_cp) {
9172 &mddev->recovery))
9173 mddev->recovery_cp =
9174 mddev->curr_resync_completed;
9176 mddev->recovery_cp =
9177 mddev->curr_resync;
9180 mddev->recovery_cp = MaxSector;
9182 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9183 mddev->curr_resync = MaxSector;
9184 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9185 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9188 if (rdev->raid_disk >= 0 &&
9189 mddev->delta_disks >= 0 &&
9190 !test_bit(Journal, &rdev->flags) &&
9191 !test_bit(Faulty, &rdev->flags) &&
9192 !test_bit(In_sync, &rdev->flags) &&
9193 rdev->recovery_offset < mddev->curr_resync)
9194 rdev->recovery_offset = mddev->curr_resync;
9203 set_mask_bits(&mddev->sb_flags, 0,
9206 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9207 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9208 mddev->delta_disks > 0 &&
9209 mddev->pers->finish_reshape &&
9210 mddev->pers->size &&
9211 mddev->queue) {
9213 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9216 set_capacity_and_notify(mddev->gendisk,
9217 mddev->array_sectors);
9220 spin_lock(&mddev->lock);
9221 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9223 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9224 mddev->resync_min = 0;
9225 mddev->resync_max = MaxSector;
9226 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9227 mddev->resync_min = mddev->curr_resync_completed;
9228 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9229 mddev->curr_resync = MD_RESYNC_NONE;
9230 spin_unlock(&mddev->lock);
9233 wake_up(&mddev->sb_wait);
9234 md_wakeup_thread(mddev->thread);
9247 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9253 rdev->raid_disk >= 0 &&
9254 !test_bit(Blocked, &rdev->flags) &&
9255 test_bit(Faulty, &rdev->flags) &&
9256 atomic_read(&rdev->nr_pending)==0) {
9257 /* Faulty non-Blocked devices with nr_pending == 0
9263 set_bit(RemoveSynchronized, &rdev->flags);
9271 rdev->raid_disk >= 0 &&
9272 !test_bit(Blocked, &rdev->flags) &&
9273 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9274 (!test_bit(In_sync, &rdev->flags) &&
9275 !test_bit(Journal, &rdev->flags))) &&
9276 atomic_read(&rdev->nr_pending)==0)) {
9277 if (mddev->pers->hot_remove_disk(
9280 rdev->saved_raid_disk = rdev->raid_disk;
9281 rdev->raid_disk = -1;
9285 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9286 clear_bit(RemoveSynchronized, &rdev->flags);
9289 if (removed && mddev->kobj.sd)
9290 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9298 if (test_bit(Candidate, &rdev->flags))
9300 if (rdev->raid_disk >= 0 &&
9301 !test_bit(In_sync, &rdev->flags) &&
9302 !test_bit(Journal, &rdev->flags) &&
9303 !test_bit(Faulty, &rdev->flags))
9305 if (rdev->raid_disk >= 0)
9307 if (test_bit(Faulty, &rdev->flags))
9309 if (!test_bit(Journal, &rdev->flags)) {
9311 !(rdev->saved_raid_disk >= 0 &&
9312 !test_bit(Bitmap_sync, &rdev->flags)))
9315 rdev->recovery_offset = 0;
9317 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9320 if (!test_bit(Journal, &rdev->flags))
9323 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9328 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9336 rcu_assign_pointer(mddev->sync_thread,
9338 if (!mddev->sync_thread) {
9342 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9343 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9344 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9345 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9346 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9349 &mddev->recovery))
9350 if (mddev->sysfs_action)
9351 sysfs_notify_dirent_safe(mddev->sysfs_action);
9353 md_wakeup_thread(mddev->sync_thread);
9354 sysfs_notify_dirent_safe(mddev->sysfs_action);
9359 * This routine is regularly called by all per-raid-array threads to
9360 * deal with generic issues like resync and super-block update.
9367 * "->recovery" and create a thread at ->sync_thread.
9378 * 6/ If array has spares or is not in-sync, start a resync thread.
9382 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9383 /* Write superblock - thread that called mddev_suspend()
9386 set_bit(MD_UPDATING_SB, &mddev->flags);
9388 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9390 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9391 wake_up(&mddev->sb_wait);
9397 if (mddev->bitmap)
9401 if (mddev->pers->sync_request && !mddev->external) {
9404 mddev->safemode = 2;
9410 !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9413 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9414 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9415 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9416 (mddev->external == 0 && mddev->safemode == 1) ||
9417 (mddev->safemode == 2
9418 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9424 bool try_set_sync = mddev->safemode != 0;
9426 if (!mddev->external && mddev->safemode == 1)
9427 mddev->safemode = 0;
9431 if (!mddev->external && mddev->in_sync)
9438 clear_bit(Blocked, &rdev->flags);
9439 /* On a read-only array we can:
9440 * - remove failed devices
9441 * - add already-in_sync devices if the array itself
9442 * is in-sync.
9443 * As we only add devices that are already in-sync,
9448 * ->spare_active and clear saved_raid_disk
9450 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9452 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9453 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9454 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9464 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9465 rdev->raid_disk < 0)
9470 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9471 spin_lock(&mddev->lock);
9473 spin_unlock(&mddev->lock);
9476 if (mddev->sb_flags)
9483 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
9484 if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9486 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9490 if (WARN_ON_ONCE(!mddev->sync_thread))
9500 mddev->curr_resync_completed = 0;
9501 spin_lock(&mddev->lock);
9502 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9503 spin_unlock(&mddev->lock);
9507 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9508 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9510 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9511 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9516 * Spares are also removed and re-added, to allow
9517 * the personality to fail the re-add.
9520 if (mddev->reshape_position != MaxSector) {
9521 if (mddev->pers->check_reshape == NULL ||
9522 mddev->pers->check_reshape(mddev) != 0)
9525 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9526 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9528 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9529 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9530 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9531 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9532 } else if (mddev->recovery_cp < MaxSector) {
9533 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9534 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9535 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9539 if (mddev->pers->sync_request) {
9545 md_bitmap_write_all(mddev->bitmap);
9547 queue_work(md_misc_wq, &mddev->sync_work);
9551 if (!mddev->sync_thread) {
9552 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9555 &mddev->recovery))
9556 if (mddev->sysfs_action)
9557 sysfs_notify_dirent_safe(mddev->sysfs_action);
9560 wake_up(&mddev->sb_wait);
9569 sector_t old_dev_sectors = mddev->dev_sectors;
9573 md_unregister_thread(mddev, &mddev->sync_thread);
9574 atomic_inc(&mddev->sync_seq);
9576 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9577 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9578 mddev->degraded != mddev->raid_disks) {
9581 if (mddev->pers->spare_active(mddev)) {
9582 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9583 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9586 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9587 mddev->pers->finish_reshape) {
9588 mddev->pers->finish_reshape(mddev);
9593 /* If array is no-longer degraded, then any saved_raid_disk
9596 if (!mddev->degraded)
9598 rdev->saved_raid_disk = -1;
9604 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9605 md_cluster_ops->resync_finish(mddev);
9606 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9607 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9608 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9609 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9610 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9611 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9613 * We call md_cluster_ops->update_size here because sync_size could
9618 && !test_bit(MD_CLOSING, &mddev->flags))
9619 md_cluster_ops->update_size(mddev, old_dev_sectors);
9621 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9622 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9623 sysfs_notify_dirent_safe(mddev->sysfs_action);
9625 if (mddev->event_work.func)
9626 queue_work(md_misc_wq, &mddev->event_work);
9633 sysfs_notify_dirent_safe(rdev->sysfs_state);
9634 wait_event_timeout(rdev->blocked_wait,
9635 !test_bit(Blocked, &rdev->flags) &&
9636 !test_bit(BlockedBadBlocks, &rdev->flags),
9648 if (rdev->data_offset > rdev->new_data_offset)
9649 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9651 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9652 rdev->data_offset = rdev->new_data_offset;
9663 struct mddev *mddev = rdev->mddev;
9666 s += rdev->new_data_offset;
9668 s += rdev->data_offset;
9669 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9672 if (test_bit(ExternalBbl, &rdev->flags))
9673 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9674 sysfs_notify_dirent_safe(rdev->sysfs_state);
9675 set_mask_bits(&mddev->sb_flags, 0,
9677 md_wakeup_thread(rdev->mddev->thread);
9689 s += rdev->new_data_offset;
9691 s += rdev->data_offset;
9692 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9693 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9694 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9711 if (mddev->pers)
9713 if (mddev->persistent)
9714 mddev->safemode = 2;
9750 int ret = -ENOMEM;
9794 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9802 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9803 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9805 pr_info("md-cluster: resize failed\n");
9807 md_bitmap_update_sb(mddev->bitmap);
9812 if (test_bit(Faulty, &rdev2->flags))
9816 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9818 if (test_bit(Candidate, &rdev2->flags)) {
9821 rdev2->bdev);
9826 clear_bit(Candidate, &rdev2->flags);
9829 if (role != rdev2->raid_disk) {
9833 if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
9834 !(le32_to_cpu(sb->feature_map) &
9836 rdev2->saved_raid_disk = role;
9839 rdev2->bdev);
9840 /* wakeup mddev->thread here, so array could
9842 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9843 md_wakeup_thread(mddev->thread);
9853 clear_bit(Blocked, &rdev2->flags);
9858 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9859 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9865 * Since mddev->delta_disks has already updated in update_raid_disks,
9868 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9869 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9874 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9875 if (mddev->pers->update_reshape_pos)
9876 mddev->pers->update_reshape_pos(mddev);
9877 if (mddev->pers->start_reshape)
9878 mddev->pers->start_reshape(mddev);
9879 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9880 mddev->reshape_position != MaxSector &&
9881 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9883 mddev->reshape_position = MaxSector;
9884 if (mddev->pers->update_reshape_pos)
9885 mddev->pers->update_reshape_pos(mddev);
9889 mddev->events = le64_to_cpu(sb->events);
9895 struct page *swapout = rdev->sb_page;
9901 rdev->sb_page = NULL;
9904 ClearPageUptodate(rdev->sb_page);
9905 rdev->sb_loaded = 0;
9906 err = super_types[mddev->major_version].
9907 load_super(rdev, NULL, mddev->minor_version);
9911 __func__, __LINE__, rdev->desc_nr, err);
9912 if (rdev->sb_page)
9913 put_page(rdev->sb_page);
9914 rdev->sb_page = swapout;
9915 rdev->sb_loaded = 1;
9919 sb = page_address(rdev->sb_page);
9924 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9925 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9928 * device In_sync and mddev->degraded
9930 if (rdev->recovery_offset == MaxSector &&
9931 !test_bit(In_sync, &rdev->flags) &&
9932 mddev->pers->spare_active(mddev))
9933 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9946 if (iter->desc_nr == nr) {
9965 if (!test_bit(Faulty, &rdev->flags))
9991 node_detected_dev->dev = dev;
9993 list_add_tail(&node_detected_dev->list, &all_detected_devices);
10015 list_del(&node_detected_dev->list);
10016 dev = node_detected_dev->dev;
10024 if (test_bit(Faulty, &rdev->flags))
10027 set_bit(AutoDetected, &rdev->flags);
10028 list_add(&rdev->same_set, &pending_raid_disks);
10051 * waiting for us in select() or poll() - wake them up
10068 mddev->ctime = 0;
10069 mddev->hold_active = 0;