Lines Matching +full:conf +full:- +full:pu
1 // SPDX-License-Identifier: GPL-2.0-or-later
10 - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar
11 - RAID-6 extensions by H. Peter Anvin <hpa@zytor.com>
12 - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>
13 - kerneld support by Boris Tobotras <boris@xtalk.msk.su>
14 - kmod support by: Cyrus Durgin
15 - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com>
16 - Devfs support by Richard Gooch <rgooch@atnf.csiro.au>
18 - lots of fixes and improvements to the RAID1/RAID5 and generic
23 - persistent bitmap code
24 Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
44 #include <linux/blk-integrity.h>
65 #include <linux/percpu-refcount.h>
70 #include "md-bitmap.h"
71 #include "md-cluster.h"
103 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
108 * speed limit - in case reconstruction slows down your system despite
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
152 if (test_bit(CollisionCheck, &rdev->flags))
158 return -ENOMEM;
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
186 if (ret && !mddev->serial_info_pool)
194 * 1. it is multi-queue device flaged with writemostly.
195 * 2. the write-behind mode is enabled.
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
215 !test_bit(CollisionCheck, &rdev->flags))
228 if (mddev->serial_info_pool == NULL) {
233 mddev->serial_info_pool =
236 if (!mddev->serial_info_pool) {
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
259 if (mddev->serial_info_pool) {
267 if (!mddev->serialize_policy ||
273 test_bit(CollisionCheck, &temp->flags))
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
315 * a device node in /dev and to open it. This causes races with device-close.
352 * We hold a refcount over the call to ->make_request. By the time that
354 * and so is visible to ->quiesce(), so we don't need the refcount any more.
362 if (mddev->suspend_lo >= mddev->suspend_hi)
364 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
366 if (bio_end_sector(bio) < mddev->suspend_lo)
377 if (bio->bi_opf & REQ_NOWAIT) {
382 prepare_to_wait(&mddev->sb_wait, &__wait,
388 finish_wait(&mddev->sb_wait, &__wait);
390 if (!percpu_ref_tryget_live(&mddev->active_io))
393 if (!mddev->pers->make_request(mddev, bio)) {
394 percpu_ref_put(&mddev->active_io);
398 percpu_ref_put(&mddev->active_io);
405 struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
407 if (mddev == NULL || mddev->pers == NULL) {
412 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
421 if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) {
423 bio->bi_status = BLK_STS_IOERR;
429 bio->bi_opf &= ~REQ_NOMERGE;
442 struct md_thread *thread = rcu_dereference_protected(mddev->thread,
443 lockdep_is_held(&mddev->reconfig_mutex));
445 WARN_ON_ONCE(thread && current == thread->tsk);
446 if (mddev->suspended++)
448 wake_up(&mddev->sb_wait);
449 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
450 percpu_ref_kill(&mddev->active_io);
452 if (mddev->pers && mddev->pers->prepare_suspend)
453 mddev->pers->prepare_suspend(mddev);
455 wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
456 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
457 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
460 mddev->noio_flag = memalloc_noio_save();
466 lockdep_assert_held(&mddev->reconfig_mutex);
467 if (--mddev->suspended)
471 memalloc_noio_restore(mddev->noio_flag);
473 percpu_ref_resurrect(&mddev->active_io);
474 wake_up(&mddev->sb_wait);
476 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
477 md_wakeup_thread(mddev->thread);
478 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
488 struct md_rdev *rdev = bio->bi_private;
489 struct mddev *mddev = rdev->mddev;
495 if (atomic_dec_and_test(&mddev->flush_pending))
496 /* The pre-request flush has finished */
497 queue_work(md_wq, &mddev->flush_work);
507 mddev->start_flush = ktime_get_boottime();
508 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
509 atomic_set(&mddev->flush_pending, 1);
512 if (rdev->raid_disk >= 0 &&
513 !test_bit(Faulty, &rdev->flags)) {
516 atomic_inc(&rdev->nr_pending);
518 bi = bio_alloc_bioset(rdev->bdev, 0,
520 GFP_NOIO, &mddev->bio_set);
521 bi->bi_end_io = md_end_flush;
522 bi->bi_private = rdev;
523 atomic_inc(&mddev->flush_pending);
528 if (atomic_dec_and_test(&mddev->flush_pending))
529 queue_work(md_wq, &mddev->flush_work);
535 struct bio *bio = mddev->flush_bio;
543 spin_lock_irq(&mddev->lock);
544 mddev->prev_flush_start = mddev->start_flush;
545 mddev->flush_bio = NULL;
546 spin_unlock_irq(&mddev->lock);
547 wake_up(&mddev->sb_wait);
549 if (bio->bi_iter.bi_size == 0) {
550 /* an empty barrier - all done */
553 bio->bi_opf &= ~REQ_PREFLUSH;
557 * returns error in raid5_make_request() by dm-raid.
562 if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
567 percpu_ref_put(&mddev->active_io);
579 spin_lock_irq(&mddev->lock);
583 wait_event_lock_irq(mddev->sb_wait,
584 !mddev->flush_bio ||
585 ktime_before(req_start, mddev->prev_flush_start),
586 mddev->lock);
588 if (ktime_after(req_start, mddev->prev_flush_start)) {
589 WARN_ON(mddev->flush_bio);
600 WARN_ON(percpu_ref_is_zero(&mddev->active_io));
601 percpu_ref_get(&mddev->active_io);
602 mddev->flush_bio = bio;
605 spin_unlock_irq(&mddev->lock);
608 INIT_WORK(&mddev->flush_work, submit_flushes);
609 queue_work(md_wq, &mddev->flush_work);
612 if (bio->bi_iter.bi_size == 0)
613 /* an empty barrier - all done */
616 bio->bi_opf &= ~REQ_PREFLUSH;
628 if (test_bit(MD_DELETED, &mddev->flags))
630 atomic_inc(&mddev->active);
638 if (mddev->raid_disks || !list_empty(&mddev->disks) ||
639 mddev->ctime || mddev->hold_active)
643 set_bit(MD_DELETED, &mddev->flags);
649 queue_work(md_misc_wq, &mddev->del_work);
654 if (atomic_dec_and_test(&mddev->active))
660 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
672 mutex_init(&mddev->open_mutex);
673 mutex_init(&mddev->reconfig_mutex);
674 mutex_init(&mddev->sync_mutex);
675 mutex_init(&mddev->bitmap_info.mutex);
676 INIT_LIST_HEAD(&mddev->disks);
677 INIT_LIST_HEAD(&mddev->all_mddevs);
678 INIT_LIST_HEAD(&mddev->deleting);
679 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
680 atomic_set(&mddev->active, 1);
681 atomic_set(&mddev->openers, 0);
682 atomic_set(&mddev->sync_seq, 0);
683 spin_lock_init(&mddev->lock);
684 atomic_set(&mddev->flush_pending, 0);
685 init_waitqueue_head(&mddev->sb_wait);
686 init_waitqueue_head(&mddev->recovery_wait);
687 mddev->reshape_position = MaxSector;
688 mddev->reshape_backwards = 0;
689 mddev->last_sync_action = "none";
690 mddev->resync_min = 0;
691 mddev->resync_max = MaxSector;
692 mddev->level = LEVEL_NONE;
694 INIT_WORK(&mddev->sync_work, md_start_sync);
695 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
704 if (mddev->unit == unit)
737 unit &= ~((1 << MdpMinorShift) - 1);
741 return ERR_PTR(-ENOMEM);
746 error = -EEXIST;
749 new->unit = unit;
751 new->md_minor = MINOR(unit);
753 new->md_minor = MINOR(unit) >> MdpMinorShift;
754 new->hold_active = UNTIL_IOCTL;
756 error = -ENODEV;
757 new->unit = mddev_alloc_unit();
758 if (!new->unit)
760 new->md_minor = MINOR(new->unit);
761 new->hold_active = UNTIL_STOP;
764 list_add(&new->all_mddevs, &all_mddevs);
776 list_del(&mddev->all_mddevs);
790 if (!list_empty(&mddev->deleting))
791 list_splice_init(&mddev->deleting, &delete);
793 if (mddev->to_remove) {
799 * and anything else which might set ->to_remove or my
801 * -EBUSY if sysfs_active is still set.
806 const struct attribute_group *to_remove = mddev->to_remove;
807 mddev->to_remove = NULL;
808 mddev->sysfs_active = 1;
809 mutex_unlock(&mddev->reconfig_mutex);
811 if (mddev->kobj.sd) {
813 sysfs_remove_group(&mddev->kobj, to_remove);
814 if (mddev->pers == NULL ||
815 mddev->pers->sync_request == NULL) {
816 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
817 if (mddev->sysfs_action)
818 sysfs_put(mddev->sysfs_action);
819 if (mddev->sysfs_completed)
820 sysfs_put(mddev->sysfs_completed);
821 if (mddev->sysfs_degraded)
822 sysfs_put(mddev->sysfs_degraded);
823 mddev->sysfs_action = NULL;
824 mddev->sysfs_completed = NULL;
825 mddev->sysfs_degraded = NULL;
828 mddev->sysfs_active = 0;
830 mutex_unlock(&mddev->reconfig_mutex);
832 md_wakeup_thread(mddev->thread);
833 wake_up(&mddev->sb_wait);
836 list_del_init(&rdev->same_set);
837 kobject_del(&rdev->kobj);
848 if (rdev->desc_nr == nr)
860 if (rdev->bdev->bd_dev == dev)
871 if (rdev->bdev->bd_dev == dev)
882 if (level != LEVEL_NONE && pers->level == level)
884 if (strcmp(pers->name, clevel)==0)
893 return MD_NEW_SIZE_SECTORS(bdev_nr_sectors(rdev->bdev));
898 rdev->sb_page = alloc_page(GFP_KERNEL);
899 if (!rdev->sb_page)
900 return -ENOMEM;
906 if (rdev->sb_page) {
907 put_page(rdev->sb_page);
908 rdev->sb_loaded = 0;
909 rdev->sb_page = NULL;
910 rdev->sb_start = 0;
911 rdev->sectors = 0;
913 if (rdev->bb_page) {
914 put_page(rdev->bb_page);
915 rdev->bb_page = NULL;
917 badblocks_exit(&rdev->badblocks);
923 struct md_rdev *rdev = bio->bi_private;
924 struct mddev *mddev = rdev->mddev;
926 if (bio->bi_status) {
928 blk_status_to_errno(bio->bi_status));
930 if (!test_bit(Faulty, &rdev->flags)
931 && (bio->bi_opf & MD_FAILFAST)) {
932 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
933 set_bit(LastDev, &rdev->flags);
936 clear_bit(LastDev, &rdev->flags);
942 if (atomic_dec_and_test(&mddev->pending_writes))
943 wake_up(&mddev->sb_wait);
950 * Increment mddev->pending_writes before returning
960 if (test_bit(Faulty, &rdev->flags))
963 bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
967 GFP_NOIO, &mddev->sync_set);
969 atomic_inc(&rdev->nr_pending);
971 bio->bi_iter.bi_sector = sector;
973 bio->bi_private = rdev;
974 bio->bi_end_io = super_written;
976 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
977 test_bit(FailFast, &rdev->flags) &&
978 !test_bit(LastDev, &rdev->flags))
979 bio->bi_opf |= MD_FAILFAST;
981 atomic_inc(&mddev->pending_writes);
988 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
989 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
990 return -EAGAIN;
1000 if (metadata_op && rdev->meta_bdev)
1001 bio_init(&bio, rdev->meta_bdev, &bvec, 1, opf);
1003 bio_init(&bio, rdev->bdev, &bvec, 1, opf);
1006 bio.bi_iter.bi_sector = sector + rdev->sb_start;
1007 else if (rdev->mddev->reshape_position != MaxSector &&
1008 (rdev->mddev->reshape_backwards ==
1009 (sector >= rdev->mddev->reshape_position)))
1010 bio.bi_iter.bi_sector = sector + rdev->new_data_offset;
1012 bio.bi_iter.bi_sector = sector + rdev->data_offset;
1023 if (rdev->sb_loaded)
1026 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true))
1028 rdev->sb_loaded = 1;
1033 rdev->bdev);
1034 return -EINVAL;
1039 return sb1->set_uuid0 == sb2->set_uuid0 &&
1040 sb1->set_uuid1 == sb2->set_uuid1 &&
1041 sb1->set_uuid2 == sb2->set_uuid2 &&
1042 sb1->set_uuid3 == sb2->set_uuid3;
1064 tmp1->nr_disks = 0;
1065 tmp2->nr_disks = 0;
1087 disk_csum = sb->sb_csum;
1088 sb->sb_csum = 0;
1103 sb->sb_csum = md_csum_fold(disk_csum);
1105 sb->sb_csum = disk_csum;
1115 * We rely on user-space to write the initial superblock, and support
1122 * 0 - dev has a superblock that is compatible with refdev
1123 * 1 - dev has a superblock that is compatible and newer than refdev
1125 * -EINVAL superblock incompatible or invalid
1126 * -othererror e.g. -EIO
1130 * The first time, mddev->raid_disks will be 0, and data from
1132 * is new enough. Return 0 or -EINVAL
1161 * support bitmaps. It prints an error message and returns non-zero if mddev
1167 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1170 mdname(mddev), mddev->pers->name);
1190 rdev->sb_start = calc_dev_sboffset(rdev);
1196 ret = -EINVAL;
1198 sb = page_address(rdev->sb_page);
1200 if (sb->md_magic != MD_SB_MAGIC) {
1202 rdev->bdev);
1206 if (sb->major_version != 0 ||
1207 sb->minor_version < 90 ||
1208 sb->minor_version > 91) {
1210 sb->major_version, sb->minor_version, rdev->bdev);
1214 if (sb->raid_disks <= 0)
1217 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1218 pr_warn("md: invalid superblock checksum on %pg\n", rdev->bdev);
1222 rdev->preferred_minor = sb->md_minor;
1223 rdev->data_offset = 0;
1224 rdev->new_data_offset = 0;
1225 rdev->sb_size = MD_SB_BYTES;
1226 rdev->badblocks.shift = -1;
1228 if (sb->level == LEVEL_MULTIPATH)
1229 rdev->desc_nr = -1;
1231 rdev->desc_nr = sb->this_disk.number;
1234 if (sb->level == LEVEL_MULTIPATH ||
1235 (rdev->desc_nr >= 0 &&
1236 rdev->desc_nr < MD_SB_DISKS &&
1237 sb->disks[rdev->desc_nr].state &
1248 mdp_super_t *refsb = page_address(refdev->sb_page);
1251 rdev->bdev, refdev->bdev);
1256 rdev->bdev, refdev->bdev);
1267 rdev->sectors = rdev->sb_start;
1272 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1273 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1275 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1277 ret = -EINVAL;
1290 mdp_super_t *sb = page_address(rdev->sb_page);
1293 rdev->raid_disk = -1;
1294 clear_bit(Faulty, &rdev->flags);
1295 clear_bit(In_sync, &rdev->flags);
1296 clear_bit(Bitmap_sync, &rdev->flags);
1297 clear_bit(WriteMostly, &rdev->flags);
1299 if (mddev->raid_disks == 0) {
1300 mddev->major_version = 0;
1301 mddev->minor_version = sb->minor_version;
1302 mddev->patch_version = sb->patch_version;
1303 mddev->external = 0;
1304 mddev->chunk_sectors = sb->chunk_size >> 9;
1305 mddev->ctime = sb->ctime;
1306 mddev->utime = sb->utime;
1307 mddev->level = sb->level;
1308 mddev->clevel[0] = 0;
1309 mddev->layout = sb->layout;
1310 mddev->raid_disks = sb->raid_disks;
1311 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1312 mddev->events = ev1;
1313 mddev->bitmap_info.offset = 0;
1314 mddev->bitmap_info.space = 0;
1316 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1317 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1318 mddev->reshape_backwards = 0;
1320 if (mddev->minor_version >= 91) {
1321 mddev->reshape_position = sb->reshape_position;
1322 mddev->delta_disks = sb->delta_disks;
1323 mddev->new_level = sb->new_level;
1324 mddev->new_layout = sb->new_layout;
1325 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1326 if (mddev->delta_disks < 0)
1327 mddev->reshape_backwards = 1;
1329 mddev->reshape_position = MaxSector;
1330 mddev->delta_disks = 0;
1331 mddev->new_level = mddev->level;
1332 mddev->new_layout = mddev->layout;
1333 mddev->new_chunk_sectors = mddev->chunk_sectors;
1335 if (mddev->level == 0)
1336 mddev->layout = -1;
1338 if (sb->state & (1<<MD_SB_CLEAN))
1339 mddev->recovery_cp = MaxSector;
1341 if (sb->events_hi == sb->cp_events_hi &&
1342 sb->events_lo == sb->cp_events_lo) {
1343 mddev->recovery_cp = sb->recovery_cp;
1345 mddev->recovery_cp = 0;
1348 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1349 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1350 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1351 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1353 mddev->max_disks = MD_SB_DISKS;
1355 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1356 mddev->bitmap_info.file == NULL) {
1357 mddev->bitmap_info.offset =
1358 mddev->bitmap_info.default_offset;
1359 mddev->bitmap_info.space =
1360 mddev->bitmap_info.default_space;
1363 } else if (mddev->pers == NULL) {
1367 if (sb->disks[rdev->desc_nr].state & (
1369 if (ev1 < mddev->events)
1370 return -EINVAL;
1371 } else if (mddev->bitmap) {
1375 if (ev1 < mddev->bitmap->events_cleared)
1377 if (ev1 < mddev->events)
1378 set_bit(Bitmap_sync, &rdev->flags);
1380 if (ev1 < mddev->events)
1381 /* just a hot-add of a new device, leave raid_disk at -1 */
1385 if (mddev->level != LEVEL_MULTIPATH) {
1386 desc = sb->disks + rdev->desc_nr;
1388 if (desc->state & (1<<MD_DISK_FAULTY))
1389 set_bit(Faulty, &rdev->flags);
1390 else if (desc->state & (1<<MD_DISK_SYNC) /* &&
1391 desc->raid_disk < mddev->raid_disks */) {
1392 set_bit(In_sync, &rdev->flags);
1393 rdev->raid_disk = desc->raid_disk;
1394 rdev->saved_raid_disk = desc->raid_disk;
1395 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1399 if (mddev->minor_version >= 91) {
1400 rdev->recovery_offset = 0;
1401 rdev->raid_disk = desc->raid_disk;
1404 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1405 set_bit(WriteMostly, &rdev->flags);
1406 if (desc->state & (1<<MD_DISK_FAILFAST))
1407 set_bit(FailFast, &rdev->flags);
1409 set_bit(In_sync, &rdev->flags);
1420 int next_spare = mddev->raid_disks;
1422 /* make rdev->sb match mddev data..
1435 rdev->sb_size = MD_SB_BYTES;
1437 sb = page_address(rdev->sb_page);
1441 sb->md_magic = MD_SB_MAGIC;
1442 sb->major_version = mddev->major_version;
1443 sb->patch_version = mddev->patch_version;
1444 sb->gvalid_words = 0; /* ignored */
1445 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1446 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1447 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1448 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1450 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1451 sb->level = mddev->level;
1452 sb->size = mddev->dev_sectors / 2;
1453 sb->raid_disks = mddev->raid_disks;
1454 sb->md_minor = mddev->md_minor;
1455 sb->not_persistent = 0;
1456 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1457 sb->state = 0;
1458 sb->events_hi = (mddev->events>>32);
1459 sb->events_lo = (u32)mddev->events;
1461 if (mddev->reshape_position == MaxSector)
1462 sb->minor_version = 90;
1464 sb->minor_version = 91;
1465 sb->reshape_position = mddev->reshape_position;
1466 sb->new_level = mddev->new_level;
1467 sb->delta_disks = mddev->delta_disks;
1468 sb->new_layout = mddev->new_layout;
1469 sb->new_chunk = mddev->new_chunk_sectors << 9;
1471 mddev->minor_version = sb->minor_version;
1472 if (mddev->in_sync)
1474 sb->recovery_cp = mddev->recovery_cp;
1475 sb->cp_events_hi = (mddev->events>>32);
1476 sb->cp_events_lo = (u32)mddev->events;
1477 if (mddev->recovery_cp == MaxSector)
1478 sb->state = (1<< MD_SB_CLEAN);
1480 sb->recovery_cp = 0;
1482 sb->layout = mddev->layout;
1483 sb->chunk_size = mddev->chunk_sectors << 9;
1485 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1486 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1488 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1492 int is_active = test_bit(In_sync, &rdev2->flags);
1494 if (rdev2->raid_disk >= 0 &&
1495 sb->minor_version >= 91)
1498 * we can piggy-back on that.
1501 if (rdev2->raid_disk < 0 ||
1502 test_bit(Faulty, &rdev2->flags))
1505 desc_nr = rdev2->raid_disk;
1508 rdev2->desc_nr = desc_nr;
1509 d = &sb->disks[rdev2->desc_nr];
1511 d->number = rdev2->desc_nr;
1512 d->major = MAJOR(rdev2->bdev->bd_dev);
1513 d->minor = MINOR(rdev2->bdev->bd_dev);
1515 d->raid_disk = rdev2->raid_disk;
1517 d->raid_disk = rdev2->desc_nr; /* compatibility */
1518 if (test_bit(Faulty, &rdev2->flags))
1519 d->state = (1<<MD_DISK_FAULTY);
1521 d->state = (1<<MD_DISK_ACTIVE);
1522 if (test_bit(In_sync, &rdev2->flags))
1523 d->state |= (1<<MD_DISK_SYNC);
1527 d->state = 0;
1531 if (test_bit(WriteMostly, &rdev2->flags))
1532 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1533 if (test_bit(FailFast, &rdev2->flags))
1534 d->state |= (1<<MD_DISK_FAILFAST);
1537 for (i=0 ; i < mddev->raid_disks ; i++) {
1538 mdp_disk_t *d = &sb->disks[i];
1539 if (d->state == 0 && d->number == 0) {
1540 d->number = i;
1541 d->raid_disk = i;
1542 d->state = (1<<MD_DISK_REMOVED);
1543 d->state |= (1<<MD_DISK_FAULTY);
1547 sb->nr_disks = nr_disks;
1548 sb->active_disks = active;
1549 sb->working_disks = working;
1550 sb->failed_disks = failed;
1551 sb->spare_disks = spare;
1553 sb->this_disk = sb->disks[rdev->desc_nr];
1554 sb->sb_csum = calc_sb_csum(sb);
1563 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1565 if (rdev->mddev->bitmap_info.offset)
1567 rdev->sb_start = calc_dev_sboffset(rdev);
1568 if (!num_sectors || num_sectors > rdev->sb_start)
1569 num_sectors = rdev->sb_start;
1573 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1574 num_sectors = (sector_t)(2ULL << 32) - 2;
1576 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1577 rdev->sb_page);
1578 } while (md_super_wait(rdev->mddev) < 0);
1585 /* non-zero offset changes not possible with v0.90 */
1598 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1601 disk_csum = sb->sb_csum;
1602 sb->sb_csum = 0;
1604 for (; size >= 4; size -= 4)
1611 sb->sb_csum = disk_csum;
1634 sb_start = bdev_nr_sectors(rdev->bdev) - 8 * 2;
1635 sb_start &= ~(sector_t)(4*2-1);
1644 return -EINVAL;
1646 rdev->sb_start = sb_start;
1654 sb = page_address(rdev->sb_page);
1656 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1657 sb->major_version != cpu_to_le32(1) ||
1658 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1659 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1660 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1661 return -EINVAL;
1663 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1665 rdev->bdev);
1666 return -EINVAL;
1668 if (le64_to_cpu(sb->data_size) < 10) {
1670 rdev->bdev);
1671 return -EINVAL;
1673 if (sb->pad0 ||
1674 sb->pad3[0] ||
1675 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1676 /* Some padding is non-zero, might be a new feature */
1677 return -EINVAL;
1679 rdev->preferred_minor = 0xffff;
1680 rdev->data_offset = le64_to_cpu(sb->data_offset);
1681 rdev->new_data_offset = rdev->data_offset;
1682 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1683 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1684 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1685 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1687 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1688 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1689 if (rdev->sb_size & bmask)
1690 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1693 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1694 return -EINVAL;
1696 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1697 return -EINVAL;
1699 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1700 rdev->desc_nr = -1;
1702 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1704 if (!rdev->bb_page) {
1705 rdev->bb_page = alloc_page(GFP_KERNEL);
1706 if (!rdev->bb_page)
1707 return -ENOMEM;
1709 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1710 rdev->badblocks.count == 0) {
1718 int sectors = le16_to_cpu(sb->bblog_size);
1720 return -EINVAL;
1721 offset = le32_to_cpu(sb->bblog_offset);
1723 return -EINVAL;
1726 rdev->bb_page, REQ_OP_READ, true))
1727 return -EIO;
1728 bbp = (__le64 *)page_address(rdev->bb_page);
1729 rdev->badblocks.shift = sb->bblog_shift;
1730 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1734 sector <<= sb->bblog_shift;
1735 count <<= sb->bblog_shift;
1738 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1739 return -EINVAL;
1741 } else if (sb->bblog_offset != 0)
1742 rdev->badblocks.shift = 0;
1744 if ((le32_to_cpu(sb->feature_map) &
1746 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1747 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1748 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1751 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1752 sb->level != 0)
1753 return -EINVAL;
1756 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1757 (rdev->desc_nr >= 0 &&
1758 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1759 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1760 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1770 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1772 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1773 sb->level != refsb->level ||
1774 sb->layout != refsb->layout ||
1775 sb->chunksize != refsb->chunksize) {
1777 rdev->bdev,
1778 refdev->bdev);
1779 return -EINVAL;
1781 ev1 = le64_to_cpu(sb->events);
1782 ev2 = le64_to_cpu(refsb->events);
1790 sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset;
1792 sectors = rdev->sb_start;
1793 if (sectors < le64_to_cpu(sb->data_size))
1794 return -EINVAL;
1795 rdev->sectors = le64_to_cpu(sb->data_size);
1801 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1802 __u64 ev1 = le64_to_cpu(sb->events);
1804 rdev->raid_disk = -1;
1805 clear_bit(Faulty, &rdev->flags);
1806 clear_bit(In_sync, &rdev->flags);
1807 clear_bit(Bitmap_sync, &rdev->flags);
1808 clear_bit(WriteMostly, &rdev->flags);
1810 if (mddev->raid_disks == 0) {
1811 mddev->major_version = 1;
1812 mddev->patch_version = 0;
1813 mddev->external = 0;
1814 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1815 mddev->ctime = le64_to_cpu(sb->ctime);
1816 mddev->utime = le64_to_cpu(sb->utime);
1817 mddev->level = le32_to_cpu(sb->level);
1818 mddev->clevel[0] = 0;
1819 mddev->layout = le32_to_cpu(sb->layout);
1820 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1821 mddev->dev_sectors = le64_to_cpu(sb->size);
1822 mddev->events = ev1;
1823 mddev->bitmap_info.offset = 0;
1824 mddev->bitmap_info.space = 0;
1826 * using 3K - total of 4K
1828 mddev->bitmap_info.default_offset = 1024 >> 9;
1829 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1830 mddev->reshape_backwards = 0;
1832 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1833 memcpy(mddev->uuid, sb->set_uuid, 16);
1835 mddev->max_disks = (4096-256)/2;
1837 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1838 mddev->bitmap_info.file == NULL) {
1839 mddev->bitmap_info.offset =
1840 (__s32)le32_to_cpu(sb->bitmap_offset);
1846 if (mddev->minor_version > 0)
1847 mddev->bitmap_info.space = 0;
1848 else if (mddev->bitmap_info.offset > 0)
1849 mddev->bitmap_info.space =
1850 8 - mddev->bitmap_info.offset;
1852 mddev->bitmap_info.space =
1853 -mddev->bitmap_info.offset;
1856 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1857 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1858 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1859 mddev->new_level = le32_to_cpu(sb->new_level);
1860 mddev->new_layout = le32_to_cpu(sb->new_layout);
1861 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1862 if (mddev->delta_disks < 0 ||
1863 (mddev->delta_disks == 0 &&
1864 (le32_to_cpu(sb->feature_map)
1866 mddev->reshape_backwards = 1;
1868 mddev->reshape_position = MaxSector;
1869 mddev->delta_disks = 0;
1870 mddev->new_level = mddev->level;
1871 mddev->new_layout = mddev->layout;
1872 mddev->new_chunk_sectors = mddev->chunk_sectors;
1875 if (mddev->level == 0 &&
1876 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1877 mddev->layout = -1;
1879 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1880 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1882 if (le32_to_cpu(sb->feature_map) &
1884 if (le32_to_cpu(sb->feature_map) &
1886 return -EINVAL;
1887 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1888 (le32_to_cpu(sb->feature_map) &
1890 return -EINVAL;
1891 set_bit(MD_HAS_PPL, &mddev->flags);
1893 } else if (mddev->pers == NULL) {
1899 if (rdev->desc_nr >= 0 &&
1900 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1901 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1902 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1903 if (ev1 + 1 < mddev->events)
1904 return -EINVAL;
1905 } else if (mddev->bitmap) {
1909 if (ev1 < mddev->bitmap->events_cleared)
1911 if (ev1 < mddev->events)
1912 set_bit(Bitmap_sync, &rdev->flags);
1914 if (ev1 < mddev->events)
1915 /* just a hot-add of a new device, leave raid_disk at -1 */
1918 if (mddev->level != LEVEL_MULTIPATH) {
1920 if (rdev->desc_nr < 0 ||
1921 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1923 rdev->desc_nr = -1;
1924 } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
1939 struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
1940 u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
1942 if (rdev->desc_nr >= freshest_max_dev) {
1944 pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
1945 mdname(mddev), rdev->bdev, rdev->desc_nr,
1946 freshest->bdev, freshest_max_dev);
1947 return -EUCLEAN;
1950 role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
1952 mdname(mddev), rdev->bdev, role, role, freshest->bdev);
1954 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1960 set_bit(Faulty, &rdev->flags);
1963 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1966 return -EINVAL;
1968 set_bit(Journal, &rdev->flags);
1969 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1970 rdev->raid_disk = 0;
1973 rdev->saved_raid_disk = role;
1974 if ((le32_to_cpu(sb->feature_map) &
1976 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1977 if (!(le32_to_cpu(sb->feature_map) &
1979 rdev->saved_raid_disk = -1;
1986 &mddev->recovery))
1987 set_bit(In_sync, &rdev->flags);
1989 rdev->raid_disk = role;
1992 if (sb->devflags & WriteMostly1)
1993 set_bit(WriteMostly, &rdev->flags);
1994 if (sb->devflags & FailFast1)
1995 set_bit(FailFast, &rdev->flags);
1996 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1997 set_bit(Replacement, &rdev->flags);
1999 set_bit(In_sync, &rdev->flags);
2009 /* make rdev->sb match mddev and rdev data. */
2011 sb = page_address(rdev->sb_page);
2013 sb->feature_map = 0;
2014 sb->pad0 = 0;
2015 sb->recovery_offset = cpu_to_le64(0);
2016 memset(sb->pad3, 0, sizeof(sb->pad3));
2018 sb->utime = cpu_to_le64((__u64)mddev->utime);
2019 sb->events = cpu_to_le64(mddev->events);
2020 if (mddev->in_sync)
2021 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2022 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2023 sb->resync_offset = cpu_to_le64(MaxSector);
2025 sb->resync_offset = cpu_to_le64(0);
2027 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2029 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2030 sb->size = cpu_to_le64(mddev->dev_sectors);
2031 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2032 sb->level = cpu_to_le32(mddev->level);
2033 sb->layout = cpu_to_le32(mddev->layout);
2034 if (test_bit(FailFast, &rdev->flags))
2035 sb->devflags |= FailFast1;
2037 sb->devflags &= ~FailFast1;
2039 if (test_bit(WriteMostly, &rdev->flags))
2040 sb->devflags |= WriteMostly1;
2042 sb->devflags &= ~WriteMostly1;
2043 sb->data_offset = cpu_to_le64(rdev->data_offset);
2044 sb->data_size = cpu_to_le64(rdev->sectors);
2046 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2047 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2048 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2051 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2052 !test_bit(In_sync, &rdev->flags)) {
2053 sb->feature_map |=
2055 sb->recovery_offset =
2056 cpu_to_le64(rdev->recovery_offset);
2057 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2058 sb->feature_map |=
2062 if (test_bit(Journal, &rdev->flags))
2063 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2064 if (test_bit(Replacement, &rdev->flags))
2065 sb->feature_map |=
2068 if (mddev->reshape_position != MaxSector) {
2069 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2070 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2071 sb->new_layout = cpu_to_le32(mddev->new_layout);
2072 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2073 sb->new_level = cpu_to_le32(mddev->new_level);
2074 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2075 if (mddev->delta_disks == 0 &&
2076 mddev->reshape_backwards)
2077 sb->feature_map
2079 if (rdev->new_data_offset != rdev->data_offset) {
2080 sb->feature_map
2082 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2083 - rdev->data_offset));
2088 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2090 if (rdev->badblocks.count == 0)
2092 else if (sb->bblog_offset == 0)
2096 struct badblocks *bb = &rdev->badblocks;
2097 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2098 u64 *p = bb->page;
2099 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2100 if (bb->changed) {
2104 seq = read_seqbegin(&bb->lock);
2108 for (i = 0 ; i < bb->count ; i++) {
2114 bb->changed = 0;
2115 if (read_seqretry(&bb->lock, seq))
2118 bb->sector = (rdev->sb_start +
2119 (int)le32_to_cpu(sb->bblog_offset));
2120 bb->size = le16_to_cpu(sb->bblog_size);
2126 if (rdev2->desc_nr+1 > max_dev)
2127 max_dev = rdev2->desc_nr+1;
2129 if (max_dev > le32_to_cpu(sb->max_dev)) {
2131 sb->max_dev = cpu_to_le32(max_dev);
2132 rdev->sb_size = max_dev * 2 + 256;
2133 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2134 if (rdev->sb_size & bmask)
2135 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2137 max_dev = le32_to_cpu(sb->max_dev);
2140 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2142 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2143 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2145 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2146 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2147 sb->feature_map |=
2150 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2151 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2152 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2156 i = rdev2->desc_nr;
2157 if (test_bit(Faulty, &rdev2->flags))
2158 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2159 else if (test_bit(In_sync, &rdev2->flags))
2160 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2161 else if (test_bit(Journal, &rdev2->flags))
2162 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2163 else if (rdev2->raid_disk >= 0)
2164 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2166 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2169 sb->sb_csum = calc_sb_1_csum(sb);
2181 else if (dev_size - 64*2 >= 200*1024*1024*2)
2183 else if (dev_size - 4*2 > 8*1024*1024*2)
2195 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2197 if (rdev->data_offset != rdev->new_data_offset)
2199 if (rdev->sb_start < rdev->data_offset) {
2201 max_sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset;
2204 } else if (rdev->mddev->bitmap_info.offset) {
2210 sector_t dev_size = bdev_nr_sectors(rdev->bdev);
2213 sb_start = dev_size - 8*2;
2214 sb_start &= ~(sector_t)(4*2 - 1);
2221 max_sectors = sb_start - bm_space - 4*2;
2225 rdev->sb_start = sb_start;
2227 sb = page_address(rdev->sb_page);
2228 sb->data_size = cpu_to_le64(num_sectors);
2229 sb->super_offset = cpu_to_le64(rdev->sb_start);
2230 sb->sb_csum = calc_sb_1_csum(sb);
2232 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2233 rdev->sb_page);
2234 } while (md_super_wait(rdev->mddev) < 0);
2245 if (new_offset >= rdev->data_offset)
2250 if (rdev->mddev->minor_version == 0)
2257 * beyond write-intent bitmap
2259 if (rdev->sb_start + (32+4)*2 > new_offset)
2261 bitmap = rdev->mddev->bitmap;
2262 if (bitmap && !rdev->mddev->bitmap_info.file &&
2263 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2264 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2266 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2283 .name = "md-1",
2295 if (mddev->sync_super) {
2296 mddev->sync_super(mddev, rdev);
2300 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2302 super_types[mddev->major_version].sync_super(mddev, rdev);
2311 if (test_bit(Faulty, &rdev->flags) ||
2312 test_bit(Journal, &rdev->flags) ||
2313 rdev->raid_disk == -1)
2316 if (test_bit(Faulty, &rdev2->flags) ||
2317 test_bit(Journal, &rdev2->flags) ||
2318 rdev2->raid_disk == -1)
2320 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2343 if (list_empty(&mddev->disks))
2345 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2348 /* skip spares and non-functional disks */
2349 if (test_bit(Faulty, &rdev->flags))
2351 if (rdev->raid_disk < 0)
2359 if (blk_integrity_compare(reference->bdev->bd_disk,
2360 rdev->bdev->bd_disk) < 0)
2361 return -EINVAL;
2363 if (!reference || !bdev_get_integrity(reference->bdev))
2369 blk_integrity_register(mddev->gendisk,
2370 bdev_get_integrity(reference->bdev));
2373 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
2374 (mddev->level != 1 && mddev->level != 10 &&
2375 bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
2378 * because the function is called by md_run() -> pers->run(),
2379 * md_run calls bioset_exit -> bioset_integrity_free in case
2384 return -EINVAL;
2398 if (!mddev->gendisk)
2401 bi_mddev = blk_get_integrity(mddev->gendisk);
2406 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2408 mdname(mddev), rdev->bdev);
2409 return -ENXIO;
2418 return bdev_read_only(rdev->bdev) ||
2419 (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
2428 if (find_rdev(mddev, rdev->bdev->bd_dev))
2429 return -EEXIST;
2431 if (rdev_read_only(rdev) && mddev->pers)
2432 return -EROFS;
2434 /* make sure rdev->sectors exceeds mddev->dev_sectors */
2435 if (!test_bit(Journal, &rdev->flags) &&
2436 rdev->sectors &&
2437 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2438 if (mddev->pers) {
2440 * If mddev->level <= 0, then we don't care
2443 if (mddev->level > 0)
2444 return -ENOSPC;
2446 mddev->dev_sectors = rdev->sectors;
2449 /* Verify rdev->desc_nr is unique.
2450 * If it is -1, assign a free number, else
2454 if (rdev->desc_nr < 0) {
2456 if (mddev->pers)
2457 choice = mddev->raid_disks;
2460 rdev->desc_nr = choice;
2462 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2464 return -EBUSY;
2468 if (!test_bit(Journal, &rdev->flags) &&
2469 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2471 mdname(mddev), mddev->max_disks);
2472 return -EBUSY;
2474 snprintf(b, sizeof(b), "%pg", rdev->bdev);
2477 rdev->mddev = mddev;
2480 if (mddev->raid_disks)
2483 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2487 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2488 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2489 rdev->sysfs_unack_badblocks =
2490 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2491 rdev->sysfs_badblocks =
2492 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2494 list_add_rcu(&rdev->same_set, &mddev->disks);
2495 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2498 mddev->recovery_disabled++;
2503 pr_warn("md: failed to register dev-%s for %s\n",
2516 pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
2519 if (test_bit(AutoDetected, &rdev->flags))
2520 md_autodetect_dev(rdev->bdev->bd_dev);
2522 blkdev_put(rdev->bdev,
2523 test_bit(Holder, &rdev->flags) ? rdev : &claim_rdev);
2524 rdev->bdev = NULL;
2525 kobject_put(&rdev->kobj);
2530 struct mddev *mddev = rdev->mddev;
2532 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2533 list_del_rcu(&rdev->same_set);
2534 pr_debug("md: unbind<%pg>\n", rdev->bdev);
2535 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2536 rdev->mddev = NULL;
2537 sysfs_remove_link(&rdev->kobj, "block");
2538 sysfs_put(rdev->sysfs_state);
2539 sysfs_put(rdev->sysfs_unack_badblocks);
2540 sysfs_put(rdev->sysfs_badblocks);
2541 rdev->sysfs_state = NULL;
2542 rdev->sysfs_unack_badblocks = NULL;
2543 rdev->sysfs_badblocks = NULL;
2544 rdev->badblocks.count = 0;
2553 list_add(&rdev->same_set, &mddev->deleting);
2560 while (!list_empty(&mddev->disks)) {
2561 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2565 mddev->raid_disks = 0;
2566 mddev->major_version = 0;
2571 lockdep_assert_held(&mddev->lock);
2572 if (!mddev->in_sync) {
2573 mddev->sync_checkers++;
2574 spin_unlock(&mddev->lock);
2575 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2576 spin_lock(&mddev->lock);
2577 if (!mddev->in_sync &&
2578 percpu_ref_is_zero(&mddev->writes_pending)) {
2579 mddev->in_sync = 1;
2581 * Ensure ->in_sync is visible before we clear
2582 * ->sync_checkers.
2585 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2586 sysfs_notify_dirent_safe(mddev->sysfs_state);
2588 if (--mddev->sync_checkers == 0)
2589 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2591 if (mddev->safemode == 1)
2592 mddev->safemode = 0;
2593 return mddev->in_sync;
2598 /* Update each superblock (in-memory image), but
2606 if (rdev->sb_events == mddev->events ||
2608 rdev->raid_disk < 0 &&
2609 rdev->sb_events+1 == mddev->events)) {
2611 rdev->sb_loaded = 2;
2614 rdev->sb_loaded = 1;
2627 if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
2636 sb = page_address(rdev->sb_page);
2639 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2641 if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
2642 !test_bit(Faulty, &rdev->flags))
2645 if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
2650 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2651 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2652 (mddev->layout != le32_to_cpu(sb->layout)) ||
2653 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2654 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2666 int ret = -1;
2670 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2676 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2678 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2680 ret = md_cluster_ops->metadata_update_start(mddev);
2684 md_cluster_ops->metadata_update_cancel(mddev);
2685 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2695 * During reshape/resync it might use array-addresses rather
2699 if (rdev->raid_disk >= 0 &&
2700 mddev->delta_disks >= 0 &&
2701 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2702 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2703 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2704 !test_bit(Journal, &rdev->flags) &&
2705 !test_bit(In_sync, &rdev->flags) &&
2706 mddev->curr_resync_completed > rdev->recovery_offset)
2707 rdev->recovery_offset = mddev->curr_resync_completed;
2710 if (!mddev->persistent) {
2711 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2712 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2713 if (!mddev->external) {
2714 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2716 if (rdev->badblocks.changed) {
2717 rdev->badblocks.changed = 0;
2718 ack_all_badblocks(&rdev->badblocks);
2721 clear_bit(Blocked, &rdev->flags);
2722 clear_bit(BlockedBadBlocks, &rdev->flags);
2723 wake_up(&rdev->blocked_wait);
2726 wake_up(&mddev->sb_wait);
2730 spin_lock(&mddev->lock);
2732 mddev->utime = ktime_get_real_seconds();
2734 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2736 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2737 /* just a clean<-> dirty transition, possibly leave spares alone,
2744 if (mddev->degraded)
2748 * might have a event_count that still looks up-to-date,
2749 * so it can be re-added without a resync.
2756 sync_req = mddev->in_sync;
2758 /* If this is just a dirty<->clean transition, and the array is clean
2761 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2762 && mddev->can_decrease_events
2763 && mddev->events != 1) {
2764 mddev->events--;
2765 mddev->can_decrease_events = 0;
2768 mddev->events ++;
2769 mddev->can_decrease_events = nospares;
2773 * This 64-bit counter should never wrap.
2777 WARN_ON(mddev->events == 0);
2780 if (rdev->badblocks.changed)
2782 if (test_bit(Faulty, &rdev->flags))
2783 set_bit(FaultRecorded, &rdev->flags);
2787 spin_unlock(&mddev->lock);
2790 mdname(mddev), mddev->in_sync);
2792 if (mddev->queue)
2793 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2795 md_bitmap_update_sb(mddev->bitmap);
2797 if (rdev->sb_loaded != 1)
2800 if (!test_bit(Faulty, &rdev->flags)) {
2802 rdev->sb_start, rdev->sb_size,
2803 rdev->sb_page);
2805 rdev->bdev,
2806 (unsigned long long)rdev->sb_start);
2807 rdev->sb_events = mddev->events;
2808 if (rdev->badblocks.size) {
2810 rdev->badblocks.sector,
2811 rdev->badblocks.size << 9,
2812 rdev->bb_page);
2813 rdev->badblocks.size = 0;
2818 rdev->bdev);
2820 if (mddev->level == LEVEL_MULTIPATH)
2826 /* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
2829 md_cluster_ops->metadata_update_finish(mddev);
2831 if (mddev->in_sync != sync_req ||
2832 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2836 wake_up(&mddev->sb_wait);
2837 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2838 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2841 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2842 clear_bit(Blocked, &rdev->flags);
2845 ack_all_badblocks(&rdev->badblocks);
2846 clear_bit(BlockedBadBlocks, &rdev->flags);
2847 wake_up(&rdev->blocked_wait);
2854 struct mddev *mddev = rdev->mddev;
2856 bool add_journal = test_bit(Journal, &rdev->flags);
2858 if (!mddev->pers->hot_remove_disk || add_journal) {
2863 super_types[mddev->major_version].
2867 err = mddev->pers->hot_add_disk(mddev, rdev);
2875 sysfs_notify_dirent_safe(rdev->sysfs_state);
2877 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2878 if (mddev->degraded)
2879 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2880 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2882 md_wakeup_thread(mddev->thread);
2917 unsigned long flags = READ_ONCE(rdev->flags);
2921 rdev->badblocks.unacked_exist))
2930 (rdev->badblocks.unacked_exist
2949 len -= strlen(sep);
2958 * faulty - simulates an error
2959 * remove - disconnects the device
2960 * writemostly - sets write_mostly
2961 * -writemostly - clears write_mostly
2962 * blocked - sets the Blocked flags
2963 * -blocked - clears the Blocked and possibly simulates an error
2964 * insync - sets Insync providing device isn't active
2965 * -insync - clear Insync for a device with a slot assigned,
2967 * write_error - sets WriteErrorSeen
2968 * -write_error - clears WriteErrorSeen
2969 * {,-}failfast - set/clear FailFast
2972 struct mddev *mddev = rdev->mddev;
2973 int err = -EINVAL;
2976 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2977 md_error(rdev->mddev, rdev);
2979 if (test_bit(MD_BROKEN, &rdev->mddev->flags))
2980 err = -EBUSY;
2984 if (rdev->mddev->pers) {
2985 clear_bit(Blocked, &rdev->flags);
2986 remove_and_add_spares(rdev->mddev, rdev);
2988 if (rdev->raid_disk >= 0)
2989 err = -EBUSY;
2993 err = md_cluster_ops->remove_disk(mddev, rdev);
2997 if (mddev->pers) {
2998 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2999 md_wakeup_thread(mddev->thread);
3005 set_bit(WriteMostly, &rdev->flags);
3006 mddev_create_serial_pool(rdev->mddev, rdev, false);
3009 } else if (cmd_match(buf, "-writemostly")) {
3010 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3011 clear_bit(WriteMostly, &rdev->flags);
3015 set_bit(Blocked, &rdev->flags);
3017 } else if (cmd_match(buf, "-blocked")) {
3018 if (!test_bit(Faulty, &rdev->flags) &&
3019 !test_bit(ExternalBbl, &rdev->flags) &&
3020 rdev->badblocks.unacked_exist) {
3024 md_error(rdev->mddev, rdev);
3026 clear_bit(Blocked, &rdev->flags);
3027 clear_bit(BlockedBadBlocks, &rdev->flags);
3028 wake_up(&rdev->blocked_wait);
3029 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3030 md_wakeup_thread(rdev->mddev->thread);
3033 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3034 set_bit(In_sync, &rdev->flags);
3037 set_bit(FailFast, &rdev->flags);
3040 } else if (cmd_match(buf, "-failfast")) {
3041 clear_bit(FailFast, &rdev->flags);
3044 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3045 !test_bit(Journal, &rdev->flags)) {
3046 if (rdev->mddev->pers == NULL) {
3047 clear_bit(In_sync, &rdev->flags);
3048 rdev->saved_raid_disk = rdev->raid_disk;
3049 rdev->raid_disk = -1;
3053 set_bit(WriteErrorSeen, &rdev->flags);
3055 } else if (cmd_match(buf, "-write_error")) {
3056 clear_bit(WriteErrorSeen, &rdev->flags);
3059 /* Any non-spare device that is not a replacement can
3063 if (rdev->raid_disk >= 0 &&
3064 !test_bit(Journal, &rdev->flags) &&
3065 !test_bit(Replacement, &rdev->flags))
3066 set_bit(WantReplacement, &rdev->flags);
3067 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3068 md_wakeup_thread(rdev->mddev->thread);
3070 } else if (cmd_match(buf, "-want_replacement")) {
3075 clear_bit(WantReplacement, &rdev->flags);
3081 if (rdev->mddev->pers)
3082 err = -EBUSY;
3084 set_bit(Replacement, &rdev->flags);
3087 } else if (cmd_match(buf, "-replacement")) {
3089 if (rdev->mddev->pers)
3090 err = -EBUSY;
3092 clear_bit(Replacement, &rdev->flags);
3095 } else if (cmd_match(buf, "re-add")) {
3096 if (!rdev->mddev->pers)
3097 err = -EINVAL;
3098 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3099 rdev->saved_raid_disk >= 0) {
3106 if (!mddev_is_clustered(rdev->mddev) ||
3107 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3108 clear_bit(Faulty, &rdev->flags);
3112 err = -EBUSY;
3113 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3114 set_bit(ExternalBbl, &rdev->flags);
3115 rdev->badblocks.shift = 0;
3117 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3118 clear_bit(ExternalBbl, &rdev->flags);
3124 sysfs_notify_dirent_safe(rdev->sysfs_state);
3133 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3145 atomic_set(&rdev->corrected_errors, n);
3154 if (test_bit(Journal, &rdev->flags))
3156 else if (rdev->raid_disk < 0)
3159 return sprintf(page, "%d\n", rdev->raid_disk);
3168 if (test_bit(Journal, &rdev->flags))
3169 return -EBUSY;
3171 slot = -1;
3178 return -ENOSPC;
3180 if (rdev->mddev->pers && slot == -1) {
3183 * with the personality with ->hot_*_disk.
3188 if (rdev->raid_disk == -1)
3189 return -EEXIST;
3191 if (rdev->mddev->pers->hot_remove_disk == NULL)
3192 return -EINVAL;
3193 clear_bit(Blocked, &rdev->flags);
3194 remove_and_add_spares(rdev->mddev, rdev);
3195 if (rdev->raid_disk >= 0)
3196 return -EBUSY;
3197 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3198 md_wakeup_thread(rdev->mddev->thread);
3199 } else if (rdev->mddev->pers) {
3205 if (rdev->raid_disk != -1)
3206 return -EBUSY;
3208 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3209 return -EBUSY;
3211 if (rdev->mddev->pers->hot_add_disk == NULL)
3212 return -EINVAL;
3214 if (slot >= rdev->mddev->raid_disks &&
3215 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3216 return -ENOSPC;
3218 rdev->raid_disk = slot;
3219 if (test_bit(In_sync, &rdev->flags))
3220 rdev->saved_raid_disk = slot;
3222 rdev->saved_raid_disk = -1;
3223 clear_bit(In_sync, &rdev->flags);
3224 clear_bit(Bitmap_sync, &rdev->flags);
3225 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3227 rdev->raid_disk = -1;
3230 sysfs_notify_dirent_safe(rdev->sysfs_state);
3232 sysfs_link_rdev(rdev->mddev, rdev);
3235 if (slot >= rdev->mddev->raid_disks &&
3236 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3237 return -ENOSPC;
3238 rdev->raid_disk = slot;
3240 clear_bit(Faulty, &rdev->flags);
3241 clear_bit(WriteMostly, &rdev->flags);
3242 set_bit(In_sync, &rdev->flags);
3243 sysfs_notify_dirent_safe(rdev->sysfs_state);
3254 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3262 return -EINVAL;
3263 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3264 return -EBUSY;
3265 if (rdev->sectors && rdev->mddev->external)
3268 return -EBUSY;
3269 rdev->data_offset = offset;
3270 rdev->new_data_offset = offset;
3280 (unsigned long long)rdev->new_data_offset);
3287 struct mddev *mddev = rdev->mddev;
3290 return -EINVAL;
3292 if (mddev->sync_thread ||
3293 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3294 return -EBUSY;
3295 if (new_offset == rdev->data_offset)
3298 else if (new_offset > rdev->data_offset) {
3300 if (new_offset - rdev->data_offset
3301 + mddev->dev_sectors > rdev->sectors)
3302 return -E2BIG;
3309 if (new_offset < rdev->data_offset &&
3310 mddev->reshape_backwards)
3311 return -EINVAL;
3316 if (new_offset > rdev->data_offset &&
3317 !mddev->reshape_backwards)
3318 return -EINVAL;
3320 if (mddev->pers && mddev->persistent &&
3321 !super_types[mddev->major_version]
3323 return -E2BIG;
3324 rdev->new_data_offset = new_offset;
3325 if (new_offset > rdev->data_offset)
3326 mddev->reshape_backwards = 1;
3327 else if (new_offset < rdev->data_offset)
3328 mddev->reshape_backwards = 0;
3338 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3344 if (a->data_offset + a->sectors <= b->data_offset)
3346 if (b->data_offset + b->sectors <= a->data_offset)
3358 if (test_bit(MD_DELETED, &mddev->flags))
3361 if (rdev != rdev2 && rdev->bdev == rdev2->bdev &&
3378 return -EINVAL;
3380 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3381 return -EINVAL; /* sector conversion overflow */
3385 return -EINVAL; /* unsigned long long to sector_t overflow */
3394 struct mddev *my_mddev = rdev->mddev;
3395 sector_t oldsectors = rdev->sectors;
3398 if (test_bit(Journal, &rdev->flags))
3399 return -EBUSY;
3401 return -EINVAL;
3402 if (rdev->data_offset != rdev->new_data_offset)
3403 return -EINVAL; /* too confusing */
3404 if (my_mddev->pers && rdev->raid_disk >= 0) {
3405 if (my_mddev->persistent) {
3406 sectors = super_types[my_mddev->major_version].
3409 return -EBUSY;
3411 sectors = bdev_nr_sectors(rdev->bdev) -
3412 rdev->data_offset;
3413 if (!my_mddev->pers->resize)
3415 return -EINVAL;
3417 if (sectors < my_mddev->dev_sectors)
3418 return -EINVAL; /* component must fit device */
3420 rdev->sectors = sectors;
3427 if (sectors > oldsectors && my_mddev->external &&
3434 rdev->sectors = oldsectors;
3435 return -EBUSY;
3445 unsigned long long recovery_start = rdev->recovery_offset;
3447 if (test_bit(In_sync, &rdev->flags) ||
3461 return -EINVAL;
3463 if (rdev->mddev->pers &&
3464 rdev->raid_disk >= 0)
3465 return -EBUSY;
3467 rdev->recovery_offset = recovery_start;
3469 set_bit(In_sync, &rdev->flags);
3471 clear_bit(In_sync, &rdev->flags);
3478 /* sysfs access to bad-blocks list.
3480 * 'bad-blocks' lists sector numbers and lengths of ranges that
3482 * the one-page limit of sysfs.
3485 * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
3491 return badblocks_show(&rdev->badblocks, page, 0);
3495 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3497 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3498 wake_up(&rdev->blocked_wait);
3506 return badblocks_show(&rdev->badblocks, page, 1);
3510 return badblocks_store(&rdev->badblocks, page, len, 1);
3518 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3527 return -EINVAL;
3529 return -EINVAL;
3531 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3532 rdev->raid_disk >= 0)
3533 return -EBUSY;
3535 if (rdev->mddev->persistent) {
3536 if (rdev->mddev->major_version == 0)
3537 return -EINVAL;
3538 if ((sector > rdev->sb_start &&
3539 sector - rdev->sb_start > S16_MAX) ||
3540 (sector < rdev->sb_start &&
3541 rdev->sb_start - sector > -S16_MIN))
3542 return -EINVAL;
3543 rdev->ppl.offset = sector - rdev->sb_start;
3544 } else if (!rdev->mddev->external) {
3545 return -EBUSY;
3547 rdev->ppl.sector = sector;
3557 return sprintf(page, "%u\n", rdev->ppl.size);
3566 return -EINVAL;
3568 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3569 rdev->raid_disk >= 0)
3570 return -EBUSY;
3572 if (rdev->mddev->persistent) {
3573 if (rdev->mddev->major_version == 0)
3574 return -EINVAL;
3576 return -EINVAL;
3577 } else if (!rdev->mddev->external) {
3578 return -EBUSY;
3580 rdev->ppl.size = size;
3608 if (!entry->show)
3609 return -EIO;
3610 if (!rdev->mddev)
3611 return -ENODEV;
3612 return entry->show(rdev, page);
3623 struct mddev *mddev = rdev->mddev;
3625 if (!entry->store)
3626 return -EIO;
3628 return -EACCES;
3630 if (entry->store == state_store && cmd_match(page, "remove"))
3633 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3635 if (rdev->mddev == NULL)
3636 rv = -ENODEV;
3638 rv = entry->store(rdev, page, length);
3665 rdev->desc_nr = -1;
3666 rdev->saved_raid_disk = -1;
3667 rdev->raid_disk = -1;
3668 rdev->flags = 0;
3669 rdev->data_offset = 0;
3670 rdev->new_data_offset = 0;
3671 rdev->sb_events = 0;
3672 rdev->last_read_error = 0;
3673 rdev->sb_loaded = 0;
3674 rdev->bb_page = NULL;
3675 atomic_set(&rdev->nr_pending, 0);
3676 atomic_set(&rdev->read_errors, 0);
3677 atomic_set(&rdev->corrected_errors, 0);
3679 INIT_LIST_HEAD(&rdev->same_set);
3680 init_waitqueue_head(&rdev->blocked_wait);
3684 * be used - I wonder if that matters
3686 return badblocks_init(&rdev->badblocks, 0);
3695 * - the device is nonexistent (zero size)
3696 * - the device has no valid superblock
3698 * a faulty rdev _never_ has rdev->sb set.
3709 return ERR_PTR(-ENOMEM);
3718 if (super_format == -2) {
3722 set_bit(Holder, &rdev->flags);
3725 rdev->bdev = blkdev_get_by_dev(newdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
3727 if (IS_ERR(rdev->bdev)) {
3728 pr_warn("md: could not open device unknown-block(%u,%u).\n",
3730 err = PTR_ERR(rdev->bdev);
3734 kobject_init(&rdev->kobj, &rdev_ktype);
3736 size = bdev_nr_bytes(rdev->bdev) >> BLOCK_SIZE_BITS;
3739 rdev->bdev);
3740 err = -EINVAL;
3747 if (err == -EINVAL) {
3749 rdev->bdev,
3755 rdev->bdev);
3763 blkdev_put(rdev->bdev, holder);
3782 switch (super_types[mddev->major_version].
3783 load_super(rdev, freshest, mddev->minor_version)) {
3790 pr_warn("md: fatal superblock inconsistency in %pg -- removing from array\n",
3791 rdev->bdev);
3798 return -EINVAL;
3801 super_types[mddev->major_version].
3806 if (mddev->max_disks &&
3807 (rdev->desc_nr >= mddev->max_disks ||
3808 i > mddev->max_disks)) {
3810 mdname(mddev), rdev->bdev,
3811 mddev->max_disks);
3816 if (super_types[mddev->major_version].
3818 pr_warn("md: kicking non-fresh %pg from array!\n",
3819 rdev->bdev);
3824 if (mddev->level == LEVEL_MULTIPATH) {
3825 rdev->desc_nr = i++;
3826 rdev->raid_disk = rdev->desc_nr;
3827 set_bit(In_sync, &rdev->flags);
3828 } else if (rdev->raid_disk >=
3829 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3830 !test_bit(Journal, &rdev->flags)) {
3831 rdev->raid_disk = -1;
3832 clear_bit(In_sync, &rdev->flags);
3839 /* Read a fixed-point number.
3847 * all without any floating-point arithmetic.
3852 long decimals = -1;
3858 value = *cp - '0';
3868 return -EINVAL;
3871 *res = result * int_pow(10, scale - decimals);
3878 unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ;
3889 return -EINVAL;
3893 return -EINVAL;
3895 mddev->safemode_delay = 0;
3897 unsigned long old_delay = mddev->safemode_delay;
3902 mddev->safemode_delay = new_delay;
3904 mod_timer(&mddev->safemode_timer, jiffies+1);
3916 spin_lock(&mddev->lock);
3917 p = mddev->pers;
3919 ret = sprintf(page, "%s\n", p->name);
3920 else if (mddev->clevel[0])
3921 ret = sprintf(page, "%s\n", mddev->clevel);
3922 else if (mddev->level != LEVEL_NONE)
3923 ret = sprintf(page, "%d\n", mddev->level);
3926 spin_unlock(&mddev->lock);
3942 return -EINVAL;
3948 if (mddev->pers == NULL) {
3949 strncpy(mddev->clevel, buf, slen);
3950 if (mddev->clevel[slen-1] == '\n')
3951 slen--;
3952 mddev->clevel[slen] = 0;
3953 mddev->level = LEVEL_NONE;
3957 rv = -EROFS;
3962 * - array is not engaged in resync/recovery/reshape
3963 * - old personality can be suspended
3964 * - new personality will access other array.
3967 rv = -EBUSY;
3968 if (mddev->sync_thread ||
3969 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3970 mddev->reshape_position != MaxSector ||
3971 mddev->sysfs_active)
3974 rv = -EINVAL;
3975 if (!mddev->pers->quiesce) {
3977 mdname(mddev), mddev->pers->name);
3983 if (clevel[slen-1] == '\n')
3984 slen--;
3989 if (request_module("md-%s", clevel) != 0)
3990 request_module("md-level-%s", clevel);
3993 if (!pers || !try_module_get(pers->owner)) {
3996 rv = -EINVAL;
4001 if (pers == mddev->pers) {
4003 module_put(pers->owner);
4007 if (!pers->takeover) {
4008 module_put(pers->owner);
4011 rv = -EINVAL;
4016 rdev->new_raid_disk = rdev->raid_disk;
4018 /* ->takeover must set new_* and/or delta_disks
4021 priv = pers->takeover(mddev);
4023 mddev->new_level = mddev->level;
4024 mddev->new_layout = mddev->layout;
4025 mddev->new_chunk_sectors = mddev->chunk_sectors;
4026 mddev->raid_disks -= mddev->delta_disks;
4027 mddev->delta_disks = 0;
4028 mddev->reshape_backwards = 0;
4029 module_put(pers->owner);
4040 spin_lock(&mddev->lock);
4041 oldpers = mddev->pers;
4042 oldpriv = mddev->private;
4043 mddev->pers = pers;
4044 mddev->private = priv;
4045 strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4046 mddev->level = mddev->new_level;
4047 mddev->layout = mddev->new_layout;
4048 mddev->chunk_sectors = mddev->new_chunk_sectors;
4049 mddev->delta_disks = 0;
4050 mddev->reshape_backwards = 0;
4051 mddev->degraded = 0;
4052 spin_unlock(&mddev->lock);
4054 if (oldpers->sync_request == NULL &&
4055 mddev->external) {
4056 /* We are converting from a no-redundancy array
4060 * clean->dirty
4063 mddev->in_sync = 0;
4064 mddev->safemode_delay = 0;
4065 mddev->safemode = 0;
4068 oldpers->free(mddev, oldpriv);
4070 if (oldpers->sync_request == NULL &&
4071 pers->sync_request != NULL) {
4073 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4076 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4077 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4078 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4080 if (oldpers->sync_request != NULL &&
4081 pers->sync_request == NULL) {
4083 if (mddev->to_remove == NULL)
4084 mddev->to_remove = &md_redundancy_group;
4087 module_put(oldpers->owner);
4090 if (rdev->raid_disk < 0)
4092 if (rdev->new_raid_disk >= mddev->raid_disks)
4093 rdev->new_raid_disk = -1;
4094 if (rdev->new_raid_disk == rdev->raid_disk)
4099 if (rdev->raid_disk < 0)
4101 if (rdev->new_raid_disk == rdev->raid_disk)
4103 rdev->raid_disk = rdev->new_raid_disk;
4104 if (rdev->raid_disk < 0)
4105 clear_bit(In_sync, &rdev->flags);
4109 rdev->raid_disk, mdname(mddev));
4113 if (pers->sync_request == NULL) {
4117 mddev->in_sync = 1;
4118 del_timer_sync(&mddev->safemode_timer);
4120 blk_set_stacking_limits(&mddev->queue->limits);
4121 pers->run(mddev);
4122 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4124 if (!mddev->thread)
4126 sysfs_notify_dirent_safe(mddev->sysfs_level);
4141 if (mddev->reshape_position != MaxSector &&
4142 mddev->layout != mddev->new_layout)
4144 mddev->new_layout, mddev->layout);
4145 return sprintf(page, "%d\n", mddev->layout);
4161 if (mddev->pers) {
4162 if (mddev->pers->check_reshape == NULL)
4163 err = -EBUSY;
4165 err = -EROFS;
4167 mddev->new_layout = n;
4168 err = mddev->pers->check_reshape(mddev);
4170 mddev->new_layout = mddev->layout;
4173 mddev->new_layout = n;
4174 if (mddev->reshape_position == MaxSector)
4175 mddev->layout = n;
4186 if (mddev->raid_disks == 0)
4188 if (mddev->reshape_position != MaxSector &&
4189 mddev->delta_disks != 0)
4190 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4191 mddev->raid_disks - mddev->delta_disks);
4192 return sprintf(page, "%d\n", mddev->raid_disks);
4210 if (mddev->pers)
4212 else if (mddev->reshape_position != MaxSector) {
4214 int olddisks = mddev->raid_disks - mddev->delta_disks;
4216 err = -EINVAL;
4219 rdev->data_offset < rdev->new_data_offset)
4222 rdev->data_offset > rdev->new_data_offset)
4226 mddev->delta_disks = n - olddisks;
4227 mddev->raid_disks = n;
4228 mddev->reshape_backwards = (mddev->delta_disks < 0);
4230 mddev->raid_disks = n;
4241 return sprintf(page, "%pU\n", mddev->uuid);
4249 if (mddev->reshape_position != MaxSector &&
4250 mddev->chunk_sectors != mddev->new_chunk_sectors)
4252 mddev->new_chunk_sectors << 9,
4253 mddev->chunk_sectors << 9);
4254 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4270 if (mddev->pers) {
4271 if (mddev->pers->check_reshape == NULL)
4272 err = -EBUSY;
4274 err = -EROFS;
4276 mddev->new_chunk_sectors = n >> 9;
4277 err = mddev->pers->check_reshape(mddev);
4279 mddev->new_chunk_sectors = mddev->chunk_sectors;
4282 mddev->new_chunk_sectors = n >> 9;
4283 if (mddev->reshape_position == MaxSector)
4284 mddev->chunk_sectors = n >> 9;
4295 if (mddev->recovery_cp == MaxSector)
4297 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4313 return -EINVAL;
4319 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4320 err = -EBUSY;
4323 mddev->recovery_cp = n;
4324 if (mddev->pers)
4325 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4350 * read-auto
4353 * clean - no pending writes, but otherwise active.
4357 * if not known, block and switch to write-pending
4363 * write-pending
4366 * active-idle
4370 * Array is failed. It's useful because mounted-arrays aren't stopped
4377 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4378 "write-pending", "active-idle", "broken", NULL };
4394 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4395 switch(mddev->ro) {
4403 spin_lock(&mddev->lock);
4404 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4406 else if (mddev->in_sync)
4408 else if (mddev->safemode)
4412 spin_unlock(&mddev->lock);
4415 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4418 if (list_empty(&mddev->disks) &&
4419 mddev->raid_disks == 0 &&
4420 mddev->dev_sectors == 0)
4438 if (mddev->pers && (st == active || st == clean) &&
4439 mddev->ro != MD_RDONLY) {
4443 spin_lock(&mddev->lock);
4446 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4447 md_wakeup_thread(mddev->thread);
4448 wake_up(&mddev->sb_wait);
4452 err = -EBUSY;
4455 sysfs_notify_dirent_safe(mddev->sysfs_state);
4456 spin_unlock(&mddev->lock);
4462 err = -EINVAL;
4472 if (mddev->pers)
4480 if (mddev->pers)
4483 mddev->ro = MD_RDONLY;
4484 set_disk_ro(mddev->gendisk, 1);
4489 if (mddev->pers) {
4492 else if (mddev->ro == MD_RDONLY)
4495 mddev->ro = MD_AUTO_READ;
4496 set_disk_ro(mddev->gendisk, 0);
4499 mddev->ro = MD_AUTO_READ;
4504 if (mddev->pers) {
4508 spin_lock(&mddev->lock);
4510 err = -EBUSY;
4511 spin_unlock(&mddev->lock);
4513 err = -EINVAL;
4516 if (mddev->pers) {
4520 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4521 wake_up(&mddev->sb_wait);
4524 mddev->ro = MD_RDWR;
4525 set_disk_ro(mddev->gendisk, 0);
4537 if (mddev->hold_active == UNTIL_IOCTL)
4538 mddev->hold_active = 0;
4539 sysfs_notify_dirent_safe(mddev->sysfs_state);
4550 atomic_read(&mddev->max_corr_read_errors));
4563 return -EINVAL;
4564 atomic_set(&mddev->max_corr_read_errors, n);
4575 return -EINVAL;
4596 return -EINVAL;
4599 return -EINVAL;
4603 return -EOVERFLOW;
4608 if (mddev->persistent) {
4609 rdev = md_import_device(dev, mddev->major_version,
4610 mddev->minor_version);
4611 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4613 = list_entry(mddev->disks.next,
4615 err = super_types[mddev->major_version]
4616 .load_super(rdev, rdev0, mddev->minor_version);
4620 } else if (mddev->external)
4621 rdev = md_import_device(dev, -2, -1);
4623 rdev = md_import_device(dev, -1, -1);
4652 if (!mddev->bitmap)
4654 /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
4658 if (*end == '-') { /* range */
4664 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4667 md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
4680 (unsigned long long)mddev->dev_sectors / 2);
4690 * If array is active, we can try an on-line resize
4700 if (mddev->pers) {
4705 if (mddev->dev_sectors == 0 ||
4706 mddev->dev_sectors > sectors)
4707 mddev->dev_sectors = sectors;
4709 err = -ENOSPC;
4727 if (mddev->persistent)
4729 mddev->major_version, mddev->minor_version);
4730 else if (mddev->external)
4731 return sprintf(page, "external:%s\n", mddev->metadata_type);
4750 err = -EBUSY;
4751 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4753 else if (!list_empty(&mddev->disks))
4758 mddev->persistent = 0;
4759 mddev->external = 0;
4760 mddev->major_version = 0;
4761 mddev->minor_version = 90;
4765 size_t namelen = len-9;
4766 if (namelen >= sizeof(mddev->metadata_type))
4767 namelen = sizeof(mddev->metadata_type)-1;
4768 strncpy(mddev->metadata_type, buf+9, namelen);
4769 mddev->metadata_type[namelen] = 0;
4770 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4771 mddev->metadata_type[--namelen] = 0;
4772 mddev->persistent = 0;
4773 mddev->external = 1;
4774 mddev->major_version = 0;
4775 mddev->minor_version = 90;
4779 err = -EINVAL;
4786 err = -ENOENT;
4789 mddev->major_version = major;
4790 mddev->minor_version = minor;
4791 mddev->persistent = 1;
4792 mddev->external = 0;
4806 unsigned long recovery = mddev->recovery;
4822 else if (mddev->reshape_position != MaxSector)
4830 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4840 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
4845 if (work_pending(&mddev->sync_work))
4848 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4853 md_wakeup_thread_directly(mddev->sync_thread);
4860 int sync_seq = atomic_read(&mddev->sync_seq);
4862 mutex_lock(&mddev->sync_mutex);
4863 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4866 wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
4867 !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
4869 mutex_unlock(&mddev->sync_mutex);
4874 mutex_lock(&mddev->sync_mutex);
4875 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4878 wait_event(resync_wait, mddev->sync_thread == NULL &&
4879 !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
4881 mutex_unlock(&mddev->sync_mutex);
4887 if (!mddev->pers || !mddev->pers->sync_request)
4888 return -EINVAL;
4895 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4896 return -EBUSY;
4898 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4900 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4901 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4904 if (mddev->pers->start_reshape == NULL)
4905 return -EINVAL;
4908 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
4909 err = -EBUSY;
4910 } else if (mddev->reshape_position == MaxSector ||
4911 mddev->pers->check_reshape == NULL ||
4912 mddev->pers->check_reshape(mddev)) {
4913 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4914 err = mddev->pers->start_reshape(mddev);
4922 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4928 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4931 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4933 return -EINVAL;
4934 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4935 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4936 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4938 if (mddev->ro == MD_AUTO_READ) {
4940 * canceling read-auto mode
4942 mddev->ro = MD_RDWR;
4943 md_wakeup_thread(mddev->sync_thread);
4945 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4946 md_wakeup_thread(mddev->thread);
4947 sysfs_notify_dirent_safe(mddev->sysfs_action);
4957 return sprintf(page, "%s\n", mddev->last_sync_action);
4967 atomic64_read(&mddev->resync_mismatches));
4976 mddev->sync_speed_min ? "local": "system");
4992 return -EINVAL;
4994 mddev->sync_speed_min = min;
5005 mddev->sync_speed_max ? "local": "system");
5021 return -EINVAL;
5023 mddev->sync_speed_max = max;
5033 return sprintf(page, "%d\n", mddev->degraded);
5040 return sprintf(page, "%d\n", mddev->parallel_resync);
5049 return -EINVAL;
5052 return -EINVAL;
5054 mddev->parallel_resync = n;
5056 if (mddev->sync_thread)
5071 if (mddev->curr_resync == MD_RESYNC_NONE)
5073 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5074 dt = (jiffies - mddev->resync_mark) / HZ;
5076 db = resync - mddev->resync_mark_cnt;
5087 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5090 if (mddev->curr_resync == MD_RESYNC_YIELDED ||
5091 mddev->curr_resync == MD_RESYNC_DELAYED)
5094 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5095 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5096 max_sectors = mddev->resync_max_sectors;
5098 max_sectors = mddev->dev_sectors;
5100 resync = mddev->curr_resync_completed;
5111 (unsigned long long)mddev->resync_min);
5120 return -EINVAL;
5122 spin_lock(&mddev->lock);
5123 err = -EINVAL;
5124 if (min > mddev->resync_max)
5127 err = -EBUSY;
5128 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5132 mddev->resync_min = round_down(min, 8);
5136 spin_unlock(&mddev->lock);
5146 if (mddev->resync_max == MaxSector)
5150 (unsigned long long)mddev->resync_max);
5156 spin_lock(&mddev->lock);
5158 mddev->resync_max = MaxSector;
5163 err = -EINVAL;
5166 if (max < mddev->resync_min)
5169 err = -EBUSY;
5170 if (max < mddev->resync_max && md_is_rdwr(mddev) &&
5171 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5175 chunk = mddev->chunk_sectors;
5179 err = -EINVAL;
5183 mddev->resync_max = max;
5185 wake_up(&mddev->recovery_wait);
5188 spin_unlock(&mddev->lock);
5198 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5211 return -EINVAL;
5216 err = -EINVAL;
5217 if (mddev->pers == NULL ||
5218 mddev->pers->quiesce == NULL)
5221 mddev->suspend_lo = new;
5235 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5248 return -EINVAL;
5253 err = -EINVAL;
5254 if (mddev->pers == NULL)
5258 mddev->suspend_hi = new;
5272 if (mddev->reshape_position != MaxSector)
5274 (unsigned long long)mddev->reshape_position);
5290 return -EINVAL;
5294 err = -EBUSY;
5295 if (mddev->pers)
5297 mddev->reshape_position = new;
5298 mddev->delta_disks = 0;
5299 mddev->reshape_backwards = 0;
5300 mddev->new_level = mddev->level;
5301 mddev->new_layout = mddev->layout;
5302 mddev->new_chunk_sectors = mddev->chunk_sectors;
5304 rdev->new_data_offset = rdev->data_offset;
5319 mddev->reshape_backwards ? "backwards" : "forwards");
5333 return -EINVAL;
5334 if (mddev->reshape_backwards == backwards)
5341 if (mddev->delta_disks)
5342 err = -EBUSY;
5343 else if (mddev->persistent &&
5344 mddev->major_version == 0)
5345 err = -EINVAL;
5347 mddev->reshape_backwards = backwards;
5359 if (mddev->external_size)
5361 (unsigned long long)mddev->array_sectors/2);
5379 return -EINVAL;
5383 if (mddev->pers)
5384 sectors = mddev->pers->size(mddev, 0, 0);
5386 sectors = mddev->array_sectors;
5388 mddev->external_size = 0;
5391 err = -EINVAL;
5392 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5393 err = -E2BIG;
5395 mddev->external_size = 1;
5399 mddev->array_sectors = sectors;
5400 if (mddev->pers)
5401 set_capacity_and_notify(mddev->gendisk,
5402 mddev->array_sectors);
5417 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5419 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5421 } else if (mddev->bitmap) {
5423 } else if (mddev->pers) {
5424 if (mddev->pers->sync_request)
5440 if (mddev->pers) {
5441 if (mddev->pers->change_consistency_policy)
5442 err = mddev->pers->change_consistency_policy(mddev, buf);
5444 err = -EBUSY;
5445 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5446 set_bit(MD_HAS_PPL, &mddev->flags);
5448 err = -EINVAL;
5460 return sprintf(page, "%d\n", mddev->fail_last_dev);
5477 if (value != mddev->fail_last_dev)
5478 mddev->fail_last_dev = value;
5488 if (mddev->pers == NULL || (mddev->pers->level != 1))
5491 return sprintf(page, "%d\n", mddev->serialize_policy);
5508 if (value == mddev->serialize_policy)
5514 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5516 err = -EINVAL;
5525 mddev->serialize_policy = value;
5598 if (!entry->show)
5599 return -EIO;
5603 return -EBUSY;
5607 rv = entry->show(mddev, page);
5620 if (!entry->store)
5621 return -EIO;
5623 return -EACCES;
5627 return -EBUSY;
5630 rv = entry->store(mddev, page, length);
5639 if (mddev->sysfs_state)
5640 sysfs_put(mddev->sysfs_state);
5641 if (mddev->sysfs_level)
5642 sysfs_put(mddev->sysfs_level);
5644 del_gendisk(mddev->gendisk);
5645 put_disk(mddev->gendisk);
5664 kobject_put(&mddev->kobj);
5671 if (mddev->writes_pending.percpu_count_ptr)
5673 if (percpu_ref_init(&mddev->writes_pending, no_op,
5675 return -ENOMEM;
5677 percpu_ref_put(&mddev->writes_pending);
5687 * If dev is non-zero it must be a device number with a MAJOR of
5714 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5716 unit = MINOR(mddev->unit) >> shift;
5725 if (mddev2->gendisk &&
5726 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5728 error = -EEXIST;
5737 mddev->hold_active = UNTIL_STOP;
5739 error = -ENOMEM;
5744 disk->major = MAJOR(mddev->unit);
5745 disk->first_minor = unit << shift;
5746 disk->minors = 1 << shift;
5748 strcpy(disk->disk_name, name);
5750 sprintf(disk->disk_name, "md_d%d", unit);
5752 sprintf(disk->disk_name, "md%d", unit);
5753 disk->fops = &md_fops;
5754 disk->private_data = mddev;
5756 mddev->queue = disk->queue;
5757 blk_set_stacking_limits(&mddev->queue->limits);
5758 blk_queue_write_cache(mddev->queue, true, true);
5759 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5760 mddev->gendisk = disk;
5765 kobject_init(&mddev->kobj, &md_ktype);
5766 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5773 mddev->hold_active = 0;
5779 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5780 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5781 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5825 while (len && val[len-1] == '\n')
5826 len--;
5828 return -E2BIG;
5838 return -EINVAL;
5845 mddev->safemode = 1;
5846 if (mddev->external)
5847 sysfs_notify_dirent_safe(mddev->sysfs_state);
5849 md_wakeup_thread(mddev->thread);
5857 wake_up(&mddev->sb_wait);
5867 if (list_empty(&mddev->disks))
5869 return -EINVAL;
5871 if (mddev->pers)
5872 return -EBUSY;
5874 if (mddev->sysfs_active)
5875 return -EBUSY;
5880 if (!mddev->raid_disks) {
5881 if (!mddev->persistent)
5882 return -EINVAL;
5885 return -EINVAL;
5888 if (mddev->level != LEVEL_NONE)
5889 request_module("md-level-%d", mddev->level);
5890 else if (mddev->clevel[0])
5891 request_module("md-%s", mddev->clevel);
5898 mddev->has_superblocks = false;
5900 if (test_bit(Faulty, &rdev->flags))
5902 sync_blockdev(rdev->bdev);
5903 invalidate_bdev(rdev->bdev);
5904 if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
5905 mddev->ro = MD_RDONLY;
5906 if (mddev->gendisk)
5907 set_disk_ro(mddev->gendisk, 1);
5910 if (rdev->sb_page)
5911 mddev->has_superblocks = true;
5917 if (rdev->meta_bdev) {
5919 } else if (rdev->data_offset < rdev->sb_start) {
5920 if (mddev->dev_sectors &&
5921 rdev->data_offset + mddev->dev_sectors
5922 > rdev->sb_start) {
5925 return -EINVAL;
5928 if (rdev->sb_start + rdev->sb_size/512
5929 > rdev->data_offset) {
5932 return -EINVAL;
5935 sysfs_notify_dirent_safe(rdev->sysfs_state);
5936 nowait = nowait && bdev_nowait(rdev->bdev);
5939 err = percpu_ref_init(&mddev->active_io, active_io_release,
5944 if (!bioset_initialized(&mddev->bio_set)) {
5945 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5949 if (!bioset_initialized(&mddev->sync_set)) {
5950 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5955 if (!bioset_initialized(&mddev->io_clone_set)) {
5956 err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
5963 pers = find_pers(mddev->level, mddev->clevel);
5964 if (!pers || !try_module_get(pers->owner)) {
5966 if (mddev->level != LEVEL_NONE)
5968 mddev->level);
5971 mddev->clevel);
5972 err = -EINVAL;
5976 if (mddev->level != pers->level) {
5977 mddev->level = pers->level;
5978 mddev->new_level = pers->level;
5980 strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5982 if (mddev->reshape_position != MaxSector &&
5983 pers->start_reshape == NULL) {
5985 module_put(pers->owner);
5986 err = -EINVAL;
5990 if (pers->sync_request) {
6000 rdev->bdev->bd_disk ==
6001 rdev2->bdev->bd_disk) {
6004 rdev->bdev,
6005 rdev2->bdev);
6011 pr_warn("True protection against single-disk failure might be compromised.\n");
6014 mddev->recovery = 0;
6015 /* may be over-ridden by personality */
6016 mddev->resync_max_sectors = mddev->dev_sectors;
6018 mddev->ok_start_degraded = start_dirty_degraded;
6021 mddev->ro = MD_AUTO_READ; /* read-only, but switch on first write */
6023 err = pers->run(mddev);
6025 pr_warn("md: pers->run() failed ...\n");
6026 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
6027 WARN_ONCE(!mddev->external_size,
6031 (unsigned long long)mddev->array_sectors / 2,
6032 (unsigned long long)pers->size(mddev, 0, 0) / 2);
6033 err = -EINVAL;
6035 if (err == 0 && pers->sync_request &&
6036 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
6039 bitmap = md_bitmap_create(mddev, -1);
6045 mddev->bitmap = bitmap;
6051 if (mddev->bitmap_info.max_write_behind > 0) {
6055 if (test_bit(WriteMostly, &rdev->flags) &&
6059 if (create_pool && mddev->serial_info_pool == NULL) {
6060 mddev->serial_info_pool =
6063 if (!mddev->serial_info_pool) {
6064 err = -ENOMEM;
6070 if (mddev->queue) {
6074 if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
6079 if (mddev->degraded)
6082 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6084 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6085 blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
6089 blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
6091 if (pers->sync_request) {
6092 if (mddev->kobj.sd &&
6093 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6096 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6097 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6098 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6099 } else if (mddev->ro == MD_AUTO_READ)
6100 mddev->ro = MD_RDWR;
6102 atomic_set(&mddev->max_corr_read_errors,
6104 mddev->safemode = 0;
6106 mddev->safemode_delay = 0;
6108 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6109 mddev->in_sync = 1;
6111 spin_lock(&mddev->lock);
6112 mddev->pers = pers;
6113 spin_unlock(&mddev->lock);
6115 if (rdev->raid_disk >= 0)
6118 if (mddev->degraded && md_is_rdwr(mddev))
6120 * via sysfs - until a lack of spares is confirmed.
6122 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6123 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6125 if (mddev->sb_flags)
6133 if (mddev->private)
6134 pers->free(mddev, mddev->private);
6135 mddev->private = NULL;
6136 module_put(pers->owner);
6139 bioset_exit(&mddev->io_clone_set);
6141 bioset_exit(&mddev->sync_set);
6143 bioset_exit(&mddev->bio_set);
6145 percpu_ref_exit(&mddev->active_io);
6154 set_bit(MD_NOT_READY, &mddev->flags);
6170 md_wakeup_thread(mddev->thread);
6171 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
6173 set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
6174 clear_bit(MD_NOT_READY, &mddev->flags);
6175 mddev->changed = 1;
6176 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6177 sysfs_notify_dirent_safe(mddev->sysfs_state);
6178 sysfs_notify_dirent_safe(mddev->sysfs_action);
6179 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6181 clear_bit(MD_NOT_READY, &mddev->flags);
6189 if (mddev->pers->start) {
6190 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6191 md_wakeup_thread(mddev->thread);
6192 ret = mddev->pers->start(mddev);
6193 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6194 md_wakeup_thread(mddev->sync_thread);
6202 struct gendisk *disk = mddev->gendisk;
6208 if (list_empty(&mddev->disks))
6209 return -ENXIO;
6210 if (!mddev->pers)
6211 return -EINVAL;
6213 return -EBUSY;
6217 if (test_bit(Journal, &rdev->flags) &&
6218 !test_bit(Faulty, &rdev->flags))
6224 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6226 return -EINVAL;
6228 return -EROFS;
6230 mddev->safemode = 0;
6231 mddev->ro = MD_RDWR;
6233 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6235 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6236 md_wakeup_thread(mddev->thread);
6237 md_wakeup_thread(mddev->sync_thread);
6238 sysfs_notify_dirent_safe(mddev->sysfs_state);
6244 mddev->array_sectors = 0;
6245 mddev->external_size = 0;
6246 mddev->dev_sectors = 0;
6247 mddev->raid_disks = 0;
6248 mddev->recovery_cp = 0;
6249 mddev->resync_min = 0;
6250 mddev->resync_max = MaxSector;
6251 mddev->reshape_position = MaxSector;
6252 /* we still need mddev->external in export_rdev, do not clear it yet */
6253 mddev->persistent = 0;
6254 mddev->level = LEVEL_NONE;
6255 mddev->clevel[0] = 0;
6261 if (mddev->hold_active)
6262 mddev->flags = 0;
6264 mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
6265 mddev->sb_flags = 0;
6266 mddev->ro = MD_RDWR;
6267 mddev->metadata_type[0] = 0;
6268 mddev->chunk_sectors = 0;
6269 mddev->ctime = mddev->utime = 0;
6270 mddev->layout = 0;
6271 mddev->max_disks = 0;
6272 mddev->events = 0;
6273 mddev->can_decrease_events = 0;
6274 mddev->delta_disks = 0;
6275 mddev->reshape_backwards = 0;
6276 mddev->new_level = LEVEL_NONE;
6277 mddev->new_layout = 0;
6278 mddev->new_chunk_sectors = 0;
6279 mddev->curr_resync = MD_RESYNC_NONE;
6280 atomic64_set(&mddev->resync_mismatches, 0);
6281 mddev->suspend_lo = mddev->suspend_hi = 0;
6282 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6283 mddev->recovery = 0;
6284 mddev->in_sync = 0;
6285 mddev->changed = 0;
6286 mddev->degraded = 0;
6287 mddev->safemode = 0;
6288 mddev->private = NULL;
6289 mddev->cluster_info = NULL;
6290 mddev->bitmap_info.offset = 0;
6291 mddev->bitmap_info.default_offset = 0;
6292 mddev->bitmap_info.default_space = 0;
6293 mddev->bitmap_info.chunksize = 0;
6294 mddev->bitmap_info.daemon_sleep = 0;
6295 mddev->bitmap_info.max_write_behind = 0;
6296 mddev->bitmap_info.nodes = 0;
6301 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6302 if (work_pending(&mddev->sync_work))
6304 if (mddev->sync_thread) {
6305 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6309 del_timer_sync(&mddev->safemode_timer);
6311 if (mddev->pers && mddev->pers->quiesce) {
6312 mddev->pers->quiesce(mddev, 1);
6313 mddev->pers->quiesce(mddev, 0);
6318 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6319 mddev->sb_flags)) {
6322 mddev->in_sync = 1;
6326 mddev->serialize_policy = 0;
6341 if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
6342 mddev->pers->quiesce(mddev, 1);
6343 mddev->pers->quiesce(mddev, 0);
6345 md_unregister_thread(mddev, &mddev->thread);
6346 if (mddev->queue)
6347 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
6352 struct md_personality *pers = mddev->pers;
6355 /* Ensure ->event_work is done */
6356 if (mddev->event_work.func)
6358 spin_lock(&mddev->lock);
6359 mddev->pers = NULL;
6360 spin_unlock(&mddev->lock);
6361 if (mddev->private)
6362 pers->free(mddev, mddev->private);
6363 mddev->private = NULL;
6364 if (pers->sync_request && mddev->to_remove == NULL)
6365 mddev->to_remove = &md_redundancy_group;
6366 module_put(pers->owner);
6367 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6369 percpu_ref_exit(&mddev->active_io);
6370 bioset_exit(&mddev->bio_set);
6371 bioset_exit(&mddev->sync_set);
6372 bioset_exit(&mddev->io_clone_set);
6377 lockdep_assert_held(&mddev->reconfig_mutex);
6380 * This is called from dm-raid
6384 percpu_ref_exit(&mddev->writes_pending);
6394 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6395 return -EBUSY;
6397 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6399 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6400 md_wakeup_thread(mddev->thread);
6402 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6403 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6409 md_wakeup_thread_directly(mddev->sync_thread);
6413 &mddev->recovery));
6414 wait_event(mddev->sb_wait,
6415 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6418 mutex_lock(&mddev->open_mutex);
6419 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6420 mddev->sync_thread ||
6421 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6423 err = -EBUSY;
6427 if (mddev->pers) {
6430 if (mddev->ro == MD_RDONLY) {
6431 err = -ENXIO;
6435 mddev->ro = MD_RDONLY;
6436 set_disk_ro(mddev->gendisk, 1);
6440 if ((mddev->pers && !err) || did_freeze) {
6441 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6442 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6443 md_wakeup_thread(mddev->thread);
6444 sysfs_notify_dirent_safe(mddev->sysfs_state);
6447 mutex_unlock(&mddev->open_mutex);
6452 * 0 - completely stop and dis-assemble array
6453 * 2 - stop but do not disassemble array
6458 struct gendisk *disk = mddev->gendisk;
6462 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6464 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6465 md_wakeup_thread(mddev->thread);
6467 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6468 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6474 md_wakeup_thread_directly(mddev->sync_thread);
6477 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6479 &mddev->recovery)));
6482 mutex_lock(&mddev->open_mutex);
6483 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6484 mddev->sysfs_active ||
6485 mddev->sync_thread ||
6486 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6488 mutex_unlock(&mddev->open_mutex);
6490 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6491 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6492 md_wakeup_thread(mddev->thread);
6494 return -EBUSY;
6496 if (mddev->pers) {
6504 sysfs_notify_dirent_safe(mddev->sysfs_state);
6507 if (rdev->raid_disk >= 0)
6511 mutex_unlock(&mddev->open_mutex);
6512 mddev->changed = 1;
6515 mddev->ro = MD_RDWR;
6517 mutex_unlock(&mddev->open_mutex);
6524 if (mddev->bitmap_info.file) {
6525 struct file *f = mddev->bitmap_info.file;
6526 spin_lock(&mddev->lock);
6527 mddev->bitmap_info.file = NULL;
6528 spin_unlock(&mddev->lock);
6531 mddev->bitmap_info.offset = 0;
6536 if (mddev->hold_active == UNTIL_STOP)
6537 mddev->hold_active = 0;
6540 sysfs_notify_dirent_safe(mddev->sysfs_state);
6550 if (list_empty(&mddev->disks))
6556 pr_cont("<%pg>", rdev->bdev);
6592 pr_debug("md: considering %pg ...\n", rdev0->bdev);
6597 rdev->bdev);
6598 list_move(&rdev->same_set, &candidates);
6607 rdev0->preferred_minor << MdpMinorShift);
6610 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6613 if (rdev0->preferred_minor != unit) {
6615 rdev0->bdev, rdev0->preferred_minor);
6625 else if (mddev->raid_disks || mddev->major_version
6626 || !list_empty(&mddev->disks)) {
6628 mdname(mddev), rdev0->bdev);
6632 mddev->persistent = 1;
6634 list_del_init(&rdev->same_set);
6645 list_del_init(&rdev->same_set);
6663 return -EFAULT;
6678 if (test_bit(Faulty, &rdev->flags))
6682 if (test_bit(In_sync, &rdev->flags))
6684 else if (test_bit(Journal, &rdev->flags))
6693 info.major_version = mddev->major_version;
6694 info.minor_version = mddev->minor_version;
6696 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6697 info.level = mddev->level;
6698 info.size = mddev->dev_sectors / 2;
6699 if (info.size != mddev->dev_sectors / 2) /* overflow */
6700 info.size = -1;
6702 info.raid_disks = mddev->raid_disks;
6703 info.md_minor = mddev->md_minor;
6704 info.not_persistent= !mddev->persistent;
6706 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6708 if (mddev->in_sync)
6710 if (mddev->bitmap && mddev->bitmap_info.offset)
6719 info.layout = mddev->layout;
6720 info.chunk_size = mddev->chunk_sectors << 9;
6723 return -EFAULT;
6736 return -ENOMEM;
6739 spin_lock(&mddev->lock);
6741 if (mddev->bitmap_info.file) {
6742 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6743 sizeof(file->pathname));
6747 memmove(file->pathname, ptr,
6748 sizeof(file->pathname)-(ptr-file->pathname));
6750 spin_unlock(&mddev->lock);
6754 err = -EFAULT;
6766 return -EFAULT;
6771 info.major = MAJOR(rdev->bdev->bd_dev);
6772 info.minor = MINOR(rdev->bdev->bd_dev);
6773 info.raid_disk = rdev->raid_disk;
6775 if (test_bit(Faulty, &rdev->flags))
6777 else if (test_bit(In_sync, &rdev->flags)) {
6781 if (test_bit(Journal, &rdev->flags))
6783 if (test_bit(WriteMostly, &rdev->flags))
6785 if (test_bit(FailFast, &rdev->flags))
6789 info.raid_disk = -1;
6795 return -EFAULT;
6803 dev_t dev = MKDEV(info->major,info->minor);
6806 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6809 return -EINVAL;
6812 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6813 return -EOVERFLOW;
6815 if (!mddev->raid_disks) {
6818 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6824 if (!list_empty(&mddev->disks)) {
6826 = list_entry(mddev->disks.next,
6828 err = super_types[mddev->major_version]
6829 .load_super(rdev, rdev0, mddev->minor_version);
6832 rdev->bdev,
6833 rdev0->bdev);
6835 return -EINVAL;
6849 if (mddev->pers) {
6851 if (!mddev->pers->hot_add_disk) {
6854 return -EINVAL;
6856 if (mddev->persistent)
6857 rdev = md_import_device(dev, mddev->major_version,
6858 mddev->minor_version);
6860 rdev = md_import_device(dev, -1, -1);
6867 if (!mddev->persistent) {
6868 if (info->state & (1<<MD_DISK_SYNC) &&
6869 info->raid_disk < mddev->raid_disks) {
6870 rdev->raid_disk = info->raid_disk;
6871 clear_bit(Bitmap_sync, &rdev->flags);
6873 rdev->raid_disk = -1;
6874 rdev->saved_raid_disk = rdev->raid_disk;
6876 super_types[mddev->major_version].
6878 if ((info->state & (1<<MD_DISK_SYNC)) &&
6879 rdev->raid_disk != info->raid_disk) {
6880 /* This was a hot-add request, but events doesn't
6884 return -EINVAL;
6887 clear_bit(In_sync, &rdev->flags); /* just to be sure */
6888 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6889 set_bit(WriteMostly, &rdev->flags);
6891 clear_bit(WriteMostly, &rdev->flags);
6892 if (info->state & (1<<MD_DISK_FAILFAST))
6893 set_bit(FailFast, &rdev->flags);
6895 clear_bit(FailFast, &rdev->flags);
6897 if (info->state & (1<<MD_DISK_JOURNAL)) {
6903 if (test_bit(Journal, &rdev2->flags)) {
6908 if (has_journal || mddev->bitmap) {
6910 return -EBUSY;
6912 set_bit(Journal, &rdev->flags);
6918 if (info->state & (1 << MD_DISK_CANDIDATE))
6919 set_bit(Candidate, &rdev->flags);
6920 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6921 /* --add initiated by this node */
6922 err = md_cluster_ops->add_new_disk(mddev, rdev);
6930 rdev->raid_disk = -1;
6937 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6939 err = md_cluster_ops->new_disk_ack(mddev,
6946 md_cluster_ops->add_new_disk_cancel(mddev);
6960 if (mddev->major_version != 0) {
6962 return -EINVAL;
6965 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6967 rdev = md_import_device(dev, -1, 0);
6973 rdev->desc_nr = info->number;
6974 if (info->raid_disk < mddev->raid_disks)
6975 rdev->raid_disk = info->raid_disk;
6977 rdev->raid_disk = -1;
6979 if (rdev->raid_disk < mddev->raid_disks)
6980 if (info->state & (1<<MD_DISK_SYNC))
6981 set_bit(In_sync, &rdev->flags);
6983 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6984 set_bit(WriteMostly, &rdev->flags);
6985 if (info->state & (1<<MD_DISK_FAILFAST))
6986 set_bit(FailFast, &rdev->flags);
6988 if (!mddev->persistent) {
6990 rdev->sb_start = bdev_nr_sectors(rdev->bdev);
6992 rdev->sb_start = calc_dev_sboffset(rdev);
6993 rdev->sectors = rdev->sb_start;
7009 if (!mddev->pers)
7010 return -ENODEV;
7014 return -ENXIO;
7016 if (rdev->raid_disk < 0)
7019 clear_bit(Blocked, &rdev->flags);
7022 if (rdev->raid_disk >= 0)
7027 if (md_cluster_ops->remove_disk(mddev, rdev))
7032 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7033 if (mddev->thread)
7034 md_wakeup_thread(mddev->thread);
7042 rdev->bdev, mdname(mddev));
7043 return -EBUSY;
7051 if (!mddev->pers)
7052 return -ENODEV;
7054 if (mddev->major_version != 0) {
7055 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
7057 return -EINVAL;
7059 if (!mddev->pers->hot_add_disk) {
7062 return -EINVAL;
7065 rdev = md_import_device(dev, -1, 0);
7069 return -EINVAL;
7072 if (mddev->persistent)
7073 rdev->sb_start = calc_dev_sboffset(rdev);
7075 rdev->sb_start = bdev_nr_sectors(rdev->bdev);
7077 rdev->sectors = rdev->sb_start;
7079 if (test_bit(Faulty, &rdev->flags)) {
7080 pr_warn("md: can not hot-add faulty %pg disk to %s!\n",
7081 rdev->bdev, mdname(mddev));
7082 err = -EINVAL;
7086 clear_bit(In_sync, &rdev->flags);
7087 rdev->desc_nr = -1;
7088 rdev->saved_raid_disk = -1;
7098 rdev->raid_disk = -1;
7100 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7101 if (!mddev->thread)
7107 if (!bdev_nowait(rdev->bdev)) {
7109 mdname(mddev), rdev->bdev);
7110 blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue);
7116 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7117 md_wakeup_thread(mddev->thread);
7130 if (mddev->pers) {
7131 if (!mddev->pers->quiesce || !mddev->thread)
7132 return -EBUSY;
7133 if (mddev->recovery || mddev->sync_thread)
7134 return -EBUSY;
7142 if (mddev->bitmap || mddev->bitmap_info.file)
7143 return -EEXIST; /* cannot add when bitmap is present */
7148 return -EINVAL;
7158 return -EBADF;
7161 inode = f->f_mapping->host;
7162 if (!S_ISREG(inode->i_mode)) {
7165 err = -EBADF;
7166 } else if (!(f->f_mode & FMODE_WRITE)) {
7169 err = -EBADF;
7170 } else if (atomic_read(&inode->i_writecount) != 1) {
7173 err = -EBUSY;
7179 mddev->bitmap_info.file = f;
7180 mddev->bitmap_info.offset = 0; /* file overrides offset */
7181 } else if (mddev->bitmap == NULL)
7182 return -ENOENT; /* cannot remove what isn't there */
7184 if (mddev->pers) {
7188 bitmap = md_bitmap_create(mddev, -1);
7191 mddev->bitmap = bitmap;
7197 fd = -1;
7207 struct file *f = mddev->bitmap_info.file;
7209 spin_lock(&mddev->lock);
7210 mddev->bitmap_info.file = NULL;
7211 spin_unlock(&mddev->lock);
7225 * This will always create an array with a type-0.90.0 superblock.
7228 * use to determine which style super-blocks are to be found on the devices.
7234 if (info->raid_disks == 0) {
7236 if (info->major_version < 0 ||
7237 info->major_version >= ARRAY_SIZE(super_types) ||
7238 super_types[info->major_version].name == NULL) {
7239 /* maybe try to auto-load a module? */
7241 info->major_version);
7242 return -EINVAL;
7244 mddev->major_version = info->major_version;
7245 mddev->minor_version = info->minor_version;
7246 mddev->patch_version = info->patch_version;
7247 mddev->persistent = !info->not_persistent;
7251 mddev->ctime = ktime_get_real_seconds();
7254 mddev->major_version = MD_MAJOR_VERSION;
7255 mddev->minor_version = MD_MINOR_VERSION;
7256 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7257 mddev->ctime = ktime_get_real_seconds();
7259 mddev->level = info->level;
7260 mddev->clevel[0] = 0;
7261 mddev->dev_sectors = 2 * (sector_t)info->size;
7262 mddev->raid_disks = info->raid_disks;
7266 if (info->state & (1<<MD_SB_CLEAN))
7267 mddev->recovery_cp = MaxSector;
7269 mddev->recovery_cp = 0;
7270 mddev->persistent = ! info->not_persistent;
7271 mddev->external = 0;
7273 mddev->layout = info->layout;
7274 if (mddev->level == 0)
7276 mddev->layout = -1;
7277 mddev->chunk_sectors = info->chunk_size >> 9;
7279 if (mddev->persistent) {
7280 mddev->max_disks = MD_SB_DISKS;
7281 mddev->flags = 0;
7282 mddev->sb_flags = 0;
7284 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7286 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7287 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7288 mddev->bitmap_info.offset = 0;
7290 mddev->reshape_position = MaxSector;
7295 get_random_bytes(mddev->uuid, 16);
7297 mddev->new_level = mddev->level;
7298 mddev->new_chunk_sectors = mddev->chunk_sectors;
7299 mddev->new_layout = mddev->layout;
7300 mddev->delta_disks = 0;
7301 mddev->reshape_backwards = 0;
7308 lockdep_assert_held(&mddev->reconfig_mutex);
7310 if (mddev->external_size)
7313 mddev->array_sectors = array_sectors;
7322 sector_t old_dev_sectors = mddev->dev_sectors;
7324 if (mddev->pers->resize == NULL)
7325 return -EINVAL;
7335 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7336 mddev->sync_thread)
7337 return -EBUSY;
7339 return -EROFS;
7342 sector_t avail = rdev->sectors;
7347 return -ENOSPC;
7349 rv = mddev->pers->resize(mddev, num_sectors);
7352 md_cluster_ops->update_size(mddev, old_dev_sectors);
7353 else if (mddev->queue) {
7354 set_capacity_and_notify(mddev->gendisk,
7355 mddev->array_sectors);
7366 if (mddev->pers->check_reshape == NULL)
7367 return -EINVAL;
7369 return -EROFS;
7371 (mddev->max_disks && raid_disks >= mddev->max_disks))
7372 return -EINVAL;
7373 if (mddev->sync_thread ||
7374 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7375 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7376 mddev->reshape_position != MaxSector)
7377 return -EBUSY;
7380 if (mddev->raid_disks < raid_disks &&
7381 rdev->data_offset < rdev->new_data_offset)
7382 return -EINVAL;
7383 if (mddev->raid_disks > raid_disks &&
7384 rdev->data_offset > rdev->new_data_offset)
7385 return -EINVAL;
7388 mddev->delta_disks = raid_disks - mddev->raid_disks;
7389 if (mddev->delta_disks < 0)
7390 mddev->reshape_backwards = 1;
7391 else if (mddev->delta_disks > 0)
7392 mddev->reshape_backwards = 0;
7394 rv = mddev->pers->check_reshape(mddev);
7396 mddev->delta_disks = 0;
7397 mddev->reshape_backwards = 0;
7404 * on-line array.
7417 if (mddev->bitmap && mddev->bitmap_info.offset)
7420 if (mddev->major_version != info->major_version ||
7421 mddev->minor_version != info->minor_version ||
7422 /* mddev->patch_version != info->patch_version || */
7423 mddev->ctime != info->ctime ||
7424 mddev->level != info->level ||
7425 /* mddev->layout != info->layout || */
7426 mddev->persistent != !info->not_persistent ||
7427 mddev->chunk_sectors != info->chunk_size >> 9 ||
7429 ((state^info->state) & 0xfffffe00)
7431 return -EINVAL;
7433 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7435 if (mddev->raid_disks != info->raid_disks)
7437 if (mddev->layout != info->layout)
7439 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7444 return -EINVAL;
7446 if (mddev->layout != info->layout) {
7451 if (mddev->pers->check_reshape == NULL)
7452 return -EINVAL;
7454 mddev->new_layout = info->layout;
7455 rv = mddev->pers->check_reshape(mddev);
7457 mddev->new_layout = mddev->layout;
7461 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7462 rv = update_size(mddev, (sector_t)info->size * 2);
7464 if (mddev->raid_disks != info->raid_disks)
7465 rv = update_raid_disks(mddev, info->raid_disks);
7467 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7468 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7469 rv = -EINVAL;
7472 if (mddev->recovery || mddev->sync_thread) {
7473 rv = -EBUSY;
7476 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7479 if (mddev->bitmap) {
7480 rv = -EEXIST;
7483 if (mddev->bitmap_info.default_offset == 0) {
7484 rv = -EINVAL;
7487 mddev->bitmap_info.offset =
7488 mddev->bitmap_info.default_offset;
7489 mddev->bitmap_info.space =
7490 mddev->bitmap_info.default_space;
7491 bitmap = md_bitmap_create(mddev, -1);
7494 mddev->bitmap = bitmap;
7503 if (!mddev->bitmap) {
7504 rv = -ENOENT;
7507 if (mddev->bitmap->storage.file) {
7508 rv = -EINVAL;
7511 if (mddev->bitmap_info.nodes) {
7513 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7515 rv = -EPERM;
7516 md_cluster_ops->unlock_all_bitmaps(mddev);
7520 mddev->bitmap_info.nodes = 0;
7521 md_cluster_ops->leave(mddev);
7523 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7528 mddev->bitmap_info.offset = 0;
7542 if (mddev->pers == NULL)
7543 return -ENODEV;
7548 err = -ENODEV;
7551 if (test_bit(MD_BROKEN, &mddev->flags))
7552 err = -EBUSY;
7562 * dosfs just mad... ;-)
7566 struct mddev *mddev = bdev->bd_disk->private_data;
7568 geo->heads = 2;
7569 geo->sectors = 4;
7570 geo->cylinders = mddev->array_sectors / 8;
7606 return -EFAULT;
7608 if (mddev->pers) {
7615 if (!list_empty(&mddev->disks)) {
7617 return -EBUSY;
7620 if (mddev->raid_disks) {
7622 return -EBUSY;
7640 return -ENOTTY;
7649 return -EACCES;
7667 mddev = bdev->bd_disk->private_data;
7672 if (!mddev->raid_disks && !mddev->external)
7673 err = -ENODEV;
7679 if (!mddev->raid_disks && !mddev->external)
7680 err = -ENODEV;
7696 /* Need to flush page cache, and ensure no-one else opens
7699 mutex_lock(&mddev->open_mutex);
7700 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7701 mutex_unlock(&mddev->open_mutex);
7702 err = -EBUSY;
7705 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7706 mutex_unlock(&mddev->open_mutex);
7707 err = -EBUSY;
7710 mutex_unlock(&mddev->open_mutex);
7730 if ((!mddev->raid_disks && !mddev->external)
7734 err = -ENODEV;
7739 * Commands even a read-only array can execute:
7759 /* We can support ADD_NEW_DISK on read-only arrays
7760 * only if we are re-adding a preexisting device.
7761 * So require mddev->pers and MD_DISK_SYNC.
7763 if (mddev->pers) {
7766 err = -EFAULT;
7768 /* Need to clear read-only for this */
7779 * superblock, so we do not allow them on read-only arrays.
7781 if (!md_is_rdwr(mddev) && mddev->pers) {
7782 if (mddev->ro != MD_AUTO_READ) {
7783 err = -EROFS;
7786 mddev->ro = MD_RDWR;
7787 sysfs_notify_dirent_safe(mddev->sysfs_state);
7788 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7790 /* If a device failed while we were read-only, we
7793 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7795 wait_event(mddev->sb_wait,
7796 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7797 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7807 err = -EFAULT;
7815 md_cluster_ops->new_disk_ack(mddev, false);
7817 err = -EINVAL;
7833 err = -EINVAL;
7838 if (mddev->hold_active == UNTIL_IOCTL &&
7839 err != -EINVAL)
7840 mddev->hold_active = 0;
7844 clear_bit(MD_CLOSING, &mddev->flags);
7869 struct mddev *mddev = bdev->bd_disk->private_data;
7876 if (!mddev->raid_disks && !mddev->external) {
7877 err = -ENODEV;
7882 * Transitioning to read-auto need only happen for arrays that call
7885 if (!ro && mddev->ro == MD_RDONLY && mddev->pers) {
7889 mddev->ro = MD_AUTO_READ;
7903 mddev = mddev_get(disk->private_data);
7906 return -ENODEV;
7908 err = mutex_lock_interruptible(&mddev->open_mutex);
7912 err = -ENODEV;
7913 if (test_bit(MD_CLOSING, &mddev->flags))
7916 atomic_inc(&mddev->openers);
7917 mutex_unlock(&mddev->open_mutex);
7923 mutex_unlock(&mddev->open_mutex);
7931 struct mddev *mddev = disk->private_data;
7934 atomic_dec(&mddev->openers);
7940 struct mddev *mddev = disk->private_data;
7943 if (mddev->changed)
7945 mddev->changed = 0;
7951 struct mddev *mddev = disk->private_data;
7953 percpu_ref_exit(&mddev->writes_pending);
7978 * md_thread is a 'system-thread', it's priority should be very
7993 * we don't add to the load-average.
8001 (thread->wqueue,
8002 test_bit(THREAD_WAKEUP, &thread->flags)
8004 thread->timeout);
8006 clear_bit(THREAD_WAKEUP, &thread->flags);
8010 thread->run(thread);
8023 wake_up_process(t->tsk);
8034 pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
8035 set_bit(THREAD_WAKEUP, &t->flags);
8036 wake_up(&t->wqueue);
8051 init_waitqueue_head(&thread->wqueue);
8053 thread->run = run;
8054 thread->mddev = mddev;
8055 thread->timeout = MAX_SCHEDULE_TIMEOUT;
8056 thread->tsk = kthread_run(md_thread, thread,
8058 mdname(thread->mddev),
8060 if (IS_ERR(thread->tsk)) {
8071 lockdep_is_held(&mddev->reconfig_mutex));
8079 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
8080 kthread_stop(thread->tsk);
8087 if (!rdev || test_bit(Faulty, &rdev->flags))
8090 if (!mddev->pers || !mddev->pers->error_handler)
8092 mddev->pers->error_handler(mddev, rdev);
8094 if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
8097 if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
8098 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8099 sysfs_notify_dirent_safe(rdev->sysfs_state);
8100 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8101 if (!test_bit(MD_BROKEN, &mddev->flags)) {
8102 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8103 md_wakeup_thread(mddev->thread);
8105 if (mddev->event_work.func)
8106 queue_work(md_misc_wq, &mddev->event_work);
8122 seq_printf(seq, "%pg ", rdev->bdev);
8137 seq_printf(seq, "[%s] ", pers->name);
8151 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8152 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8153 max_sectors = mddev->resync_max_sectors;
8155 max_sectors = mddev->dev_sectors;
8157 resync = mddev->curr_resync;
8159 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8165 res = atomic_read(&mddev->recovery_active);
8171 if (resync < res || resync - res < MD_RESYNC_ACTIVE)
8174 resync -= res;
8178 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8182 if (rdev->raid_disk >= 0 &&
8183 !test_bit(Faulty, &rdev->flags) &&
8184 rdev->recovery_offset != MaxSector &&
8185 rdev->recovery_offset) {
8189 if (mddev->reshape_position != MaxSector)
8195 if (mddev->recovery_cp < MaxSector) {
8222 int i, x = per_milli/50, y = 20-x;
8232 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8234 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8236 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8259 dt = ((jiffies - mddev->resync_mark) / HZ);
8262 curr_mark_cnt = mddev->curr_mark_cnt;
8263 recovery_active = atomic_read(&mddev->recovery_active);
8264 resync_mark_cnt = mddev->resync_mark_cnt;
8267 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8269 rt = max_sectors - resync; /* number of remaining sectors */
8284 seq->poll_event = atomic_read(&md_event_count);
8308 err = md_bitmap_get_stats(mddev->bitmap, &stats);
8312 chunk_kb = mddev->bitmap_info.chunksize >> 10;
8313 used_pages = stats.pages - stats.missing_pages;
8316 used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10),
8317 chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
8348 mutex_lock(&mddev->bitmap_info.mutex);
8350 spin_lock(&mddev->lock);
8351 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8353 mddev->pers ? "" : "in");
8354 if (mddev->pers) {
8355 if (mddev->ro == MD_RDONLY)
8356 seq_printf(seq, " (read-only)");
8357 if (mddev->ro == MD_AUTO_READ)
8358 seq_printf(seq, " (auto-read-only)");
8359 seq_printf(seq, " %s", mddev->pers->name);
8365 seq_printf(seq, " %pg[%d]", rdev->bdev, rdev->desc_nr);
8367 if (test_bit(WriteMostly, &rdev->flags))
8369 if (test_bit(Journal, &rdev->flags))
8371 if (test_bit(Faulty, &rdev->flags)) {
8375 if (rdev->raid_disk < 0)
8377 if (test_bit(Replacement, &rdev->flags))
8379 sectors += rdev->sectors;
8383 if (!list_empty(&mddev->disks)) {
8384 if (mddev->pers)
8387 mddev->array_sectors / 2);
8392 if (mddev->persistent) {
8393 if (mddev->major_version != 0 ||
8394 mddev->minor_version != 90) {
8396 mddev->major_version,
8397 mddev->minor_version);
8399 } else if (mddev->external)
8401 mddev->metadata_type);
8403 seq_printf(seq, " super non-persistent");
8405 if (mddev->pers) {
8406 mddev->pers->status(seq, mddev);
8408 if (mddev->pers->sync_request) {
8419 spin_unlock(&mddev->lock);
8420 mutex_unlock(&mddev->bitmap_info.mutex);
8446 seq = file->private_data;
8447 seq->poll_event = atomic_read(&md_event_count);
8454 struct seq_file *seq = filp->private_data;
8464 if (seq->poll_event != atomic_read(&md_event_count))
8480 p->name, p->level);
8482 list_add_tail(&p->list, &pers_list);
8490 pr_debug("md: %s personality unregistered\n", p->name);
8492 list_del_init(&p->list);
8504 ret = -EALREADY;
8527 request_module("md-cluster");
8531 pr_warn("can't find md-cluster module or get its reference.\n");
8533 return -ENOENT;
8537 ret = md_cluster_ops->join(mddev, nodes);
8539 mddev->safemode_delay = 0;
8547 md_cluster_ops->leave(mddev);
8560 struct gendisk *disk = rdev->bdev->bd_disk;
8561 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
8562 atomic_read(&disk->sync_io);
8568 * non-sync IO will cause disk_stat to increase without
8572 * the array to appear non-idle, and resync will slow
8576 * completing might cause the array to appear non-idle
8578 * not have been non-resync activity. This will only
8585 if (init || curr_events - rdev->last_events > 64) {
8586 rdev->last_events = curr_events;
8597 atomic_sub(blocks, &mddev->recovery_active);
8598 wake_up(&mddev->recovery_wait);
8600 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8601 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8602 md_wakeup_thread(mddev->thread);
8622 BUG_ON(mddev->ro == MD_RDONLY);
8623 if (mddev->ro == MD_AUTO_READ) {
8625 mddev->ro = MD_RDWR;
8626 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8627 md_wakeup_thread(mddev->thread);
8628 md_wakeup_thread(mddev->sync_thread);
8632 percpu_ref_get(&mddev->writes_pending);
8634 if (mddev->safemode == 1)
8635 mddev->safemode = 0;
8636 /* sync_checkers is always 0 when writes_pending is in per-cpu mode */
8637 if (mddev->in_sync || mddev->sync_checkers) {
8638 spin_lock(&mddev->lock);
8639 if (mddev->in_sync) {
8640 mddev->in_sync = 0;
8641 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8642 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8643 md_wakeup_thread(mddev->thread);
8646 spin_unlock(&mddev->lock);
8650 sysfs_notify_dirent_safe(mddev->sysfs_state);
8651 if (!mddev->has_superblocks)
8653 wait_event(mddev->sb_wait,
8654 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8656 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8657 percpu_ref_put(&mddev->writes_pending);
8676 WARN_ON_ONCE(mddev->in_sync || !md_is_rdwr(mddev));
8677 percpu_ref_get(&mddev->writes_pending);
8683 percpu_ref_put(&mddev->writes_pending);
8685 if (mddev->safemode == 2)
8686 md_wakeup_thread(mddev->thread);
8687 else if (mddev->safemode_delay)
8689 * every ->safemode_delay jiffies
8691 mod_timer(&mddev->safemode_timer,
8692 roundup(jiffies, mddev->safemode_delay) +
8693 mddev->safemode_delay);
8704 if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO,
8710 if (mddev->gendisk)
8712 disk_devt(mddev->gendisk),
8713 bio->bi_iter.bi_sector);
8721 if (mddev->pers->bitmap_sector)
8722 mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
8723 &md_io_clone->sectors);
8725 md_bitmap_startwrite(mddev->bitmap, md_io_clone->offset,
8726 md_io_clone->sectors);
8731 md_bitmap_endwrite(mddev->bitmap, md_io_clone->offset,
8732 md_io_clone->sectors);
8737 struct md_io_clone *md_io_clone = bio->bi_private;
8738 struct bio *orig_bio = md_io_clone->orig_bio;
8739 struct mddev *mddev = md_io_clone->mddev;
8741 if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
8744 if (bio->bi_status && !orig_bio->bi_status)
8745 orig_bio->bi_status = bio->bi_status;
8747 if (md_io_clone->start_time)
8748 bio_end_io_acct(orig_bio, md_io_clone->start_time);
8752 percpu_ref_put(&mddev->active_io);
8757 struct block_device *bdev = (*bio)->bi_bdev;
8760 bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
8763 md_io_clone->orig_bio = *bio;
8764 md_io_clone->mddev = mddev;
8765 if (blk_queue_io_stat(bdev->bd_disk->queue))
8766 md_io_clone->start_time = bio_start_io_acct(*bio);
8768 if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
8769 md_io_clone->offset = (*bio)->bi_iter.bi_sector;
8770 md_io_clone->sectors = bio_sectors(*bio);
8774 clone->bi_end_io = md_end_clone_io;
8775 clone->bi_private = md_io_clone;
8781 percpu_ref_get(&mddev->active_io);
8794 if (!mddev->pers)
8798 if (!mddev->pers->sync_request)
8801 spin_lock(&mddev->lock);
8802 if (mddev->in_sync) {
8803 mddev->in_sync = 0;
8804 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8805 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8806 if (mddev->safemode_delay &&
8807 mddev->safemode == 0)
8808 mddev->safemode = 1;
8809 spin_unlock(&mddev->lock);
8811 sysfs_notify_dirent_safe(mddev->sysfs_state);
8813 wait_event(mddev->sb_wait,
8814 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8816 spin_unlock(&mddev->lock);
8825 struct mddev *mddev = thread->mddev;
8841 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8842 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8844 if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
8845 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8850 ret = md_cluster_ops->resync_start(mddev);
8854 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8855 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8856 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8857 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8858 && ((unsigned long long)mddev->curr_resync_completed
8859 < (unsigned long long)mddev->resync_max_sectors))
8863 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8864 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8865 desc = "data-check";
8867 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8868 desc = "requested-resync";
8872 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8877 mddev->last_sync_action = action ?: desc;
8890 int mddev2_minor = -1;
8891 mddev->curr_resync = MD_RESYNC_DELAYED;
8894 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8898 if (test_bit(MD_DELETED, &mddev2->flags))
8902 if (!mddev->parallel_resync
8903 && mddev2->curr_resync
8907 mddev->curr_resync == MD_RESYNC_DELAYED) {
8909 mddev->curr_resync = MD_RESYNC_YIELDED;
8913 mddev->curr_resync == MD_RESYNC_YIELDED)
8923 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8924 mddev2->curr_resync >= mddev->curr_resync) {
8925 if (mddev2_minor != mddev2->md_minor) {
8926 mddev2_minor = mddev2->md_minor;
8943 } while (mddev->curr_resync < MD_RESYNC_DELAYED);
8946 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8950 max_sectors = mddev->resync_max_sectors;
8951 atomic64_set(&mddev->resync_mismatches, 0);
8953 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8954 j = mddev->resync_min;
8955 else if (!mddev->bitmap)
8956 j = mddev->recovery_cp;
8958 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8959 max_sectors = mddev->resync_max_sectors;
8966 mddev->reshape_position != MaxSector)
8967 j = mddev->reshape_position;
8970 max_sectors = mddev->dev_sectors;
8974 if (rdev->raid_disk >= 0 &&
8975 !test_bit(Journal, &rdev->flags) &&
8976 !test_bit(Faulty, &rdev->flags) &&
8977 !test_bit(In_sync, &rdev->flags) &&
8978 rdev->recovery_offset < j)
8979 j = rdev->recovery_offset;
8990 if (mddev->bitmap) {
8991 mddev->pers->quiesce(mddev, 1);
8992 mddev->pers->quiesce(mddev, 0);
9009 mddev->resync_mark = mark[last_mark];
9010 mddev->resync_mark_cnt = mark_cnt[last_mark];
9019 atomic_set(&mddev->recovery_active, 0);
9025 mddev->curr_resync = j;
9027 mddev->curr_resync = MD_RESYNC_ACTIVE; /* no longer delayed */
9028 mddev->curr_resync_completed = j;
9029 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9039 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9040 ((mddev->curr_resync > mddev->curr_resync_completed &&
9041 (mddev->curr_resync - mddev->curr_resync_completed)
9044 (j - mddev->curr_resync_completed)*2
9045 >= mddev->resync_max - mddev->curr_resync_completed ||
9046 mddev->curr_resync_completed > mddev->resync_max
9049 wait_event(mddev->recovery_wait,
9050 atomic_read(&mddev->recovery_active) == 0);
9051 mddev->curr_resync_completed = j;
9052 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
9053 j > mddev->recovery_cp)
9054 mddev->recovery_cp = j;
9056 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
9057 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9060 while (j >= mddev->resync_max &&
9061 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9062 /* As this condition is controlled by user-space,
9067 wait_event_interruptible(mddev->recovery_wait,
9068 mddev->resync_max > j
9070 &mddev->recovery));
9073 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9076 sectors = mddev->pers->sync_request(mddev, j, &skipped);
9078 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9084 atomic_add(sectors, &mddev->recovery_active);
9087 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9095 mddev->curr_resync = j;
9096 mddev->curr_mark_cnt = io_sectors;
9112 mddev->resync_mark = mark[next];
9113 mddev->resync_mark_cnt = mark_cnt[next];
9115 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
9119 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9124 * the 'hard' speed limit, or the system was IO-idle for
9126 * the system might be non-idle CPU-wise, but we only care
9132 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
9133 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
9134 /((jiffies-mddev->resync_mark)/HZ +1) +1;
9146 wait_event(mddev->recovery_wait,
9147 !atomic_read(&mddev->recovery_active));
9152 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
9158 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
9160 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9161 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9162 mddev->curr_resync >= MD_RESYNC_ACTIVE) {
9163 mddev->curr_resync_completed = mddev->curr_resync;
9164 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9166 mddev->pers->sync_request(mddev, max_sectors, &skipped);
9168 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9169 mddev->curr_resync > MD_RESYNC_ACTIVE) {
9170 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9171 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9172 if (mddev->curr_resync >= mddev->recovery_cp) {
9176 &mddev->recovery))
9177 mddev->recovery_cp =
9178 mddev->curr_resync_completed;
9180 mddev->recovery_cp =
9181 mddev->curr_resync;
9184 mddev->recovery_cp = MaxSector;
9186 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9187 mddev->curr_resync = MaxSector;
9188 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9189 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9192 if (rdev->raid_disk >= 0 &&
9193 mddev->delta_disks >= 0 &&
9194 !test_bit(Journal, &rdev->flags) &&
9195 !test_bit(Faulty, &rdev->flags) &&
9196 !test_bit(In_sync, &rdev->flags) &&
9197 rdev->recovery_offset < mddev->curr_resync)
9198 rdev->recovery_offset = mddev->curr_resync;
9207 set_mask_bits(&mddev->sb_flags, 0,
9210 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9211 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9212 mddev->delta_disks > 0 &&
9213 mddev->pers->finish_reshape &&
9214 mddev->pers->size &&
9215 mddev->queue) {
9217 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9220 set_capacity_and_notify(mddev->gendisk,
9221 mddev->array_sectors);
9224 spin_lock(&mddev->lock);
9225 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9227 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9228 mddev->resync_min = 0;
9229 mddev->resync_max = MaxSector;
9230 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9231 mddev->resync_min = mddev->curr_resync_completed;
9232 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9233 mddev->curr_resync = MD_RESYNC_NONE;
9234 spin_unlock(&mddev->lock);
9237 wake_up(&mddev->sb_wait);
9238 md_wakeup_thread(mddev->thread);
9251 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9257 rdev->raid_disk >= 0 &&
9258 !test_bit(Blocked, &rdev->flags) &&
9259 test_bit(Faulty, &rdev->flags) &&
9260 atomic_read(&rdev->nr_pending)==0) {
9261 /* Faulty non-Blocked devices with nr_pending == 0
9267 set_bit(RemoveSynchronized, &rdev->flags);
9275 rdev->raid_disk >= 0 &&
9276 !test_bit(Blocked, &rdev->flags) &&
9277 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9278 (!test_bit(In_sync, &rdev->flags) &&
9279 !test_bit(Journal, &rdev->flags))) &&
9280 atomic_read(&rdev->nr_pending)==0)) {
9281 if (mddev->pers->hot_remove_disk(
9284 rdev->saved_raid_disk = rdev->raid_disk;
9285 rdev->raid_disk = -1;
9289 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9290 clear_bit(RemoveSynchronized, &rdev->flags);
9293 if (removed && mddev->kobj.sd)
9294 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9302 if (test_bit(Candidate, &rdev->flags))
9304 if (rdev->raid_disk >= 0 &&
9305 !test_bit(In_sync, &rdev->flags) &&
9306 !test_bit(Journal, &rdev->flags) &&
9307 !test_bit(Faulty, &rdev->flags))
9309 if (rdev->raid_disk >= 0)
9311 if (test_bit(Faulty, &rdev->flags))
9313 if (!test_bit(Journal, &rdev->flags)) {
9315 !(rdev->saved_raid_disk >= 0 &&
9316 !test_bit(Bitmap_sync, &rdev->flags)))
9319 rdev->recovery_offset = 0;
9321 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9324 if (!test_bit(Journal, &rdev->flags))
9327 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9332 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9340 rcu_assign_pointer(mddev->sync_thread,
9342 if (!mddev->sync_thread) {
9346 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9347 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9348 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9349 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9350 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9353 &mddev->recovery))
9354 if (mddev->sysfs_action)
9355 sysfs_notify_dirent_safe(mddev->sysfs_action);
9357 md_wakeup_thread(mddev->sync_thread);
9358 sysfs_notify_dirent_safe(mddev->sysfs_action);
9363 * This routine is regularly called by all per-raid-array threads to
9364 * deal with generic issues like resync and super-block update.
9371 * "->recovery" and create a thread at ->sync_thread.
9382 * 6/ If array has spares or is not in-sync, start a resync thread.
9386 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9387 /* Write superblock - thread that called mddev_suspend()
9390 set_bit(MD_UPDATING_SB, &mddev->flags);
9392 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9394 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9395 wake_up(&mddev->sb_wait);
9401 if (mddev->bitmap)
9405 if (mddev->pers->sync_request && !mddev->external) {
9408 mddev->safemode = 2;
9414 !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9417 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9418 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9419 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9420 (mddev->external == 0 && mddev->safemode == 1) ||
9421 (mddev->safemode == 2
9422 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9428 bool try_set_sync = mddev->safemode != 0;
9430 if (!mddev->external && mddev->safemode == 1)
9431 mddev->safemode = 0;
9435 if (!mddev->external && mddev->in_sync)
9442 clear_bit(Blocked, &rdev->flags);
9443 /* On a read-only array we can:
9444 * - remove failed devices
9445 * - add already-in_sync devices if the array itself
9446 * is in-sync.
9447 * As we only add devices that are already in-sync,
9452 * ->spare_active and clear saved_raid_disk
9454 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9456 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9457 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9458 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9468 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9469 rdev->raid_disk < 0)
9474 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9475 spin_lock(&mddev->lock);
9477 spin_unlock(&mddev->lock);
9480 if (mddev->sb_flags)
9487 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
9488 if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9490 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9494 if (WARN_ON_ONCE(!mddev->sync_thread))
9504 mddev->curr_resync_completed = 0;
9505 spin_lock(&mddev->lock);
9506 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9507 spin_unlock(&mddev->lock);
9511 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9512 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9514 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9515 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9520 * Spares are also removed and re-added, to allow
9521 * the personality to fail the re-add.
9524 if (mddev->reshape_position != MaxSector) {
9525 if (mddev->pers->check_reshape == NULL ||
9526 mddev->pers->check_reshape(mddev) != 0)
9529 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9530 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9532 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9533 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9534 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9535 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9536 } else if (mddev->recovery_cp < MaxSector) {
9537 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9538 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9539 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9543 if (mddev->pers->sync_request) {
9549 md_bitmap_write_all(mddev->bitmap);
9551 queue_work(md_misc_wq, &mddev->sync_work);
9555 if (!mddev->sync_thread) {
9556 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9559 &mddev->recovery))
9560 if (mddev->sysfs_action)
9561 sysfs_notify_dirent_safe(mddev->sysfs_action);
9564 wake_up(&mddev->sb_wait);
9573 sector_t old_dev_sectors = mddev->dev_sectors;
9577 md_unregister_thread(mddev, &mddev->sync_thread);
9578 atomic_inc(&mddev->sync_seq);
9580 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9581 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9582 mddev->degraded != mddev->raid_disks) {
9585 if (mddev->pers->spare_active(mddev)) {
9586 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9587 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9590 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9591 mddev->pers->finish_reshape) {
9592 mddev->pers->finish_reshape(mddev);
9597 /* If array is no-longer degraded, then any saved_raid_disk
9600 if (!mddev->degraded)
9602 rdev->saved_raid_disk = -1;
9608 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9609 md_cluster_ops->resync_finish(mddev);
9610 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9611 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9612 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9613 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9614 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9615 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9617 * We call md_cluster_ops->update_size here because sync_size could
9622 && !test_bit(MD_CLOSING, &mddev->flags))
9623 md_cluster_ops->update_size(mddev, old_dev_sectors);
9625 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9626 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9627 sysfs_notify_dirent_safe(mddev->sysfs_action);
9629 if (mddev->event_work.func)
9630 queue_work(md_misc_wq, &mddev->event_work);
9637 sysfs_notify_dirent_safe(rdev->sysfs_state);
9638 wait_event_timeout(rdev->blocked_wait,
9639 !test_bit(Blocked, &rdev->flags) &&
9640 !test_bit(BlockedBadBlocks, &rdev->flags),
9652 if (rdev->data_offset > rdev->new_data_offset)
9653 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9655 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9656 rdev->data_offset = rdev->new_data_offset;
9667 struct mddev *mddev = rdev->mddev;
9670 s += rdev->new_data_offset;
9672 s += rdev->data_offset;
9673 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9676 if (test_bit(ExternalBbl, &rdev->flags))
9677 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9678 sysfs_notify_dirent_safe(rdev->sysfs_state);
9679 set_mask_bits(&mddev->sb_flags, 0,
9681 md_wakeup_thread(rdev->mddev->thread);
9693 s += rdev->new_data_offset;
9695 s += rdev->data_offset;
9696 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9697 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9698 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9715 if (mddev->pers)
9717 if (mddev->persistent)
9718 mddev->safemode = 2;
9754 int ret = -ENOMEM;
9798 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9806 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9807 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9809 pr_info("md-cluster: resize failed\n");
9811 md_bitmap_update_sb(mddev->bitmap);
9816 if (test_bit(Faulty, &rdev2->flags))
9820 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9822 if (test_bit(Candidate, &rdev2->flags)) {
9825 rdev2->bdev);
9830 clear_bit(Candidate, &rdev2->flags);
9833 if (role != rdev2->raid_disk) {
9837 if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
9838 !(le32_to_cpu(sb->feature_map) &
9840 rdev2->saved_raid_disk = role;
9843 rdev2->bdev);
9844 /* wakeup mddev->thread here, so array could
9846 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9847 md_wakeup_thread(mddev->thread);
9857 clear_bit(Blocked, &rdev2->flags);
9862 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9863 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9869 * Since mddev->delta_disks has already updated in update_raid_disks,
9872 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9873 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9878 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9879 if (mddev->pers->update_reshape_pos)
9880 mddev->pers->update_reshape_pos(mddev);
9881 if (mddev->pers->start_reshape)
9882 mddev->pers->start_reshape(mddev);
9883 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9884 mddev->reshape_position != MaxSector &&
9885 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9887 mddev->reshape_position = MaxSector;
9888 if (mddev->pers->update_reshape_pos)
9889 mddev->pers->update_reshape_pos(mddev);
9893 mddev->events = le64_to_cpu(sb->events);
9899 struct page *swapout = rdev->sb_page;
9905 rdev->sb_page = NULL;
9908 ClearPageUptodate(rdev->sb_page);
9909 rdev->sb_loaded = 0;
9910 err = super_types[mddev->major_version].
9911 load_super(rdev, NULL, mddev->minor_version);
9915 __func__, __LINE__, rdev->desc_nr, err);
9916 if (rdev->sb_page)
9917 put_page(rdev->sb_page);
9918 rdev->sb_page = swapout;
9919 rdev->sb_loaded = 1;
9923 sb = page_address(rdev->sb_page);
9928 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9929 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9932 * device In_sync and mddev->degraded
9934 if (rdev->recovery_offset == MaxSector &&
9935 !test_bit(In_sync, &rdev->flags) &&
9936 mddev->pers->spare_active(mddev))
9937 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9950 if (iter->desc_nr == nr) {
9969 if (!test_bit(Faulty, &rdev->flags))
9995 node_detected_dev->dev = dev;
9997 list_add_tail(&node_detected_dev->list, &all_detected_devices);
10019 list_del(&node_detected_dev->list);
10020 dev = node_detected_dev->dev;
10028 if (test_bit(Faulty, &rdev->flags))
10031 set_bit(AutoDetected, &rdev->flags);
10032 list_add(&rdev->same_set, &pending_raid_disks);
10055 * waiting for us in select() or poll() - wake them up
10072 mddev->ctime = 0;
10073 mddev->hold_active = 0;