Lines Matching +full:conf +full:- +full:tx
1 // SPDX-License-Identifier: GPL-2.0-or-later
8 * RAID-4/5/6 management functions.
9 * Thanks to Penguin Computing for making the RAID-6 development possible
22 * conf->seq_write is the number of the last batch successfully written.
23 * conf->seq_flush is the number of the last batch that was closed to
26 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
57 #include "md-bitmap.h"
58 #include "raid5-log.h"
71 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) in stripe_hash() argument
73 int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK; in stripe_hash()
74 return &conf->stripe_hashtbl[hash]; in stripe_hash()
77 static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect) in stripe_hash_locks_hash() argument
79 return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK; in stripe_hash_locks_hash()
82 static inline void lock_device_hash_lock(struct r5conf *conf, int hash) in lock_device_hash_lock() argument
84 spin_lock_irq(conf->hash_locks + hash); in lock_device_hash_lock()
85 spin_lock(&conf->device_lock); in lock_device_hash_lock()
88 static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) in unlock_device_hash_lock() argument
90 spin_unlock(&conf->device_lock); in unlock_device_hash_lock()
91 spin_unlock_irq(conf->hash_locks + hash); in unlock_device_hash_lock()
94 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) in lock_all_device_hash_locks_irq() argument
97 spin_lock_irq(conf->hash_locks); in lock_all_device_hash_locks_irq()
99 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); in lock_all_device_hash_locks_irq()
100 spin_lock(&conf->device_lock); in lock_all_device_hash_locks_irq()
103 static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) in unlock_all_device_hash_locks_irq() argument
106 spin_unlock(&conf->device_lock); in unlock_all_device_hash_locks_irq()
107 for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) in unlock_all_device_hash_locks_irq()
108 spin_unlock(conf->hash_locks + i); in unlock_all_device_hash_locks_irq()
109 spin_unlock_irq(conf->hash_locks); in unlock_all_device_hash_locks_irq()
115 if (sh->ddf_layout) in raid6_d0()
119 if (sh->qd_idx == sh->disks - 1) in raid6_d0()
122 return sh->qd_idx + 1; in raid6_d0()
132 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
133 * is raid_disks-1. This help does that mapping.
140 if (sh->ddf_layout) in raid6_idx_to_slot()
142 if (idx == sh->pd_idx) in raid6_idx_to_slot()
144 if (idx == sh->qd_idx) in raid6_idx_to_slot()
146 if (!sh->ddf_layout) in raid6_idx_to_slot()
151 static void print_raid5_conf (struct r5conf *conf);
155 return sh->check_state || sh->reconstruct_state || in stripe_operations_active()
156 test_bit(STRIPE_BIOFILL_RUN, &sh->state) || in stripe_operations_active()
157 test_bit(STRIPE_COMPUTE_RUN, &sh->state); in stripe_operations_active()
162 return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) || in stripe_is_lowprio()
163 test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) && in stripe_is_lowprio()
164 !test_bit(STRIPE_R5C_CACHING, &sh->state); in stripe_is_lowprio()
169 struct r5conf *conf = sh->raid_conf; in raid5_wakeup_stripe_thread() local
172 int i, cpu = sh->cpu; in raid5_wakeup_stripe_thread()
176 sh->cpu = cpu; in raid5_wakeup_stripe_thread()
179 if (list_empty(&sh->lru)) { in raid5_wakeup_stripe_thread()
181 group = conf->worker_groups + cpu_to_group(cpu); in raid5_wakeup_stripe_thread()
183 list_add_tail(&sh->lru, &group->loprio_list); in raid5_wakeup_stripe_thread()
185 list_add_tail(&sh->lru, &group->handle_list); in raid5_wakeup_stripe_thread()
186 group->stripes_cnt++; in raid5_wakeup_stripe_thread()
187 sh->group = group; in raid5_wakeup_stripe_thread()
190 if (conf->worker_cnt_per_group == 0) { in raid5_wakeup_stripe_thread()
191 md_wakeup_thread(conf->mddev->thread); in raid5_wakeup_stripe_thread()
195 group = conf->worker_groups + cpu_to_group(sh->cpu); in raid5_wakeup_stripe_thread()
197 group->workers[0].working = true; in raid5_wakeup_stripe_thread()
199 queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work); in raid5_wakeup_stripe_thread()
201 thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1; in raid5_wakeup_stripe_thread()
203 for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) { in raid5_wakeup_stripe_thread()
204 if (group->workers[i].working == false) { in raid5_wakeup_stripe_thread()
205 group->workers[i].working = true; in raid5_wakeup_stripe_thread()
206 queue_work_on(sh->cpu, raid5_wq, in raid5_wakeup_stripe_thread()
207 &group->workers[i].work); in raid5_wakeup_stripe_thread()
208 thread_cnt--; in raid5_wakeup_stripe_thread()
213 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, in do_release_stripe() argument
219 BUG_ON(!list_empty(&sh->lru)); in do_release_stripe()
220 BUG_ON(atomic_read(&conf->active_stripes)==0); in do_release_stripe()
222 if (r5c_is_writeback(conf->log)) in do_release_stripe()
223 for (i = sh->disks; i--; ) in do_release_stripe()
224 if (test_bit(R5_InJournal, &sh->dev[i].flags)) in do_release_stripe()
233 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || in do_release_stripe()
234 (conf->quiesce && r5c_is_writeback(conf->log) && in do_release_stripe()
235 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { in do_release_stripe()
236 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) in do_release_stripe()
238 set_bit(STRIPE_HANDLE, &sh->state); in do_release_stripe()
241 if (test_bit(STRIPE_HANDLE, &sh->state)) { in do_release_stripe()
242 if (test_bit(STRIPE_DELAYED, &sh->state) && in do_release_stripe()
243 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in do_release_stripe()
244 list_add_tail(&sh->lru, &conf->delayed_list); in do_release_stripe()
245 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && in do_release_stripe()
246 sh->bm_seq - conf->seq_write > 0) in do_release_stripe()
247 list_add_tail(&sh->lru, &conf->bitmap_list); in do_release_stripe()
249 clear_bit(STRIPE_DELAYED, &sh->state); in do_release_stripe()
250 clear_bit(STRIPE_BIT_DELAY, &sh->state); in do_release_stripe()
251 if (conf->worker_cnt_per_group == 0) { in do_release_stripe()
253 list_add_tail(&sh->lru, in do_release_stripe()
254 &conf->loprio_list); in do_release_stripe()
256 list_add_tail(&sh->lru, in do_release_stripe()
257 &conf->handle_list); in do_release_stripe()
263 md_wakeup_thread(conf->mddev->thread); in do_release_stripe()
266 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in do_release_stripe()
267 if (atomic_dec_return(&conf->preread_active_stripes) in do_release_stripe()
269 md_wakeup_thread(conf->mddev->thread); in do_release_stripe()
270 atomic_dec(&conf->active_stripes); in do_release_stripe()
271 if (!test_bit(STRIPE_EXPANDING, &sh->state)) { in do_release_stripe()
272 if (!r5c_is_writeback(conf->log)) in do_release_stripe()
273 list_add_tail(&sh->lru, temp_inactive_list); in do_release_stripe()
275 WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)); in do_release_stripe()
277 list_add_tail(&sh->lru, temp_inactive_list); in do_release_stripe()
278 else if (injournal == conf->raid_disks - conf->max_degraded) { in do_release_stripe()
280 if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) in do_release_stripe()
281 atomic_inc(&conf->r5c_cached_full_stripes); in do_release_stripe()
282 if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) in do_release_stripe()
283 atomic_dec(&conf->r5c_cached_partial_stripes); in do_release_stripe()
284 list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); in do_release_stripe()
285 r5c_check_cached_full_stripe(conf); in do_release_stripe()
292 list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list); in do_release_stripe()
298 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh, in __release_stripe() argument
301 if (atomic_dec_and_test(&sh->count)) in __release_stripe()
302 do_release_stripe(conf, sh, temp_inactive_list); in __release_stripe()
312 static void release_inactive_stripe_list(struct r5conf *conf, in release_inactive_stripe_list() argument
322 hash = NR_STRIPE_HASH_LOCKS - 1; in release_inactive_stripe_list()
326 struct list_head *list = &temp_inactive_list[size - 1]; in release_inactive_stripe_list()
333 spin_lock_irqsave(conf->hash_locks + hash, flags); in release_inactive_stripe_list()
334 if (list_empty(conf->inactive_list + hash) && in release_inactive_stripe_list()
336 atomic_dec(&conf->empty_inactive_list_nr); in release_inactive_stripe_list()
337 list_splice_tail_init(list, conf->inactive_list + hash); in release_inactive_stripe_list()
339 spin_unlock_irqrestore(conf->hash_locks + hash, flags); in release_inactive_stripe_list()
341 size--; in release_inactive_stripe_list()
342 hash--; in release_inactive_stripe_list()
346 wake_up(&conf->wait_for_stripe); in release_inactive_stripe_list()
347 if (atomic_read(&conf->active_stripes) == 0) in release_inactive_stripe_list()
348 wake_up(&conf->wait_for_quiescent); in release_inactive_stripe_list()
349 if (conf->retry_read_aligned) in release_inactive_stripe_list()
350 md_wakeup_thread(conf->mddev->thread); in release_inactive_stripe_list()
354 /* should hold conf->device_lock already */
355 static int release_stripe_list(struct r5conf *conf, in release_stripe_list() argument
362 head = llist_del_all(&conf->released_stripes); in release_stripe_list()
369 clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state); in release_stripe_list()
375 hash = sh->hash_lock_index; in release_stripe_list()
376 __release_stripe(conf, sh, &temp_inactive_list[hash]); in release_stripe_list()
385 struct r5conf *conf = sh->raid_conf; in raid5_release_stripe() local
393 if (atomic_add_unless(&sh->count, -1, 1)) in raid5_release_stripe()
396 if (unlikely(!conf->mddev->thread) || in raid5_release_stripe()
397 test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) in raid5_release_stripe()
399 wakeup = llist_add(&sh->release_list, &conf->released_stripes); in raid5_release_stripe()
401 md_wakeup_thread(conf->mddev->thread); in raid5_release_stripe()
405 if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { in raid5_release_stripe()
407 hash = sh->hash_lock_index; in raid5_release_stripe()
408 do_release_stripe(conf, sh, &list); in raid5_release_stripe()
409 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_release_stripe()
410 release_inactive_stripe_list(conf, &list, hash); in raid5_release_stripe()
417 (unsigned long long)sh->sector); in remove_hash()
419 hlist_del_init(&sh->hash); in remove_hash()
422 static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh) in insert_hash() argument
424 struct hlist_head *hp = stripe_hash(conf, sh->sector); in insert_hash()
427 (unsigned long long)sh->sector); in insert_hash()
429 hlist_add_head(&sh->hash, hp); in insert_hash()
433 static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash) in get_free_stripe() argument
438 if (list_empty(conf->inactive_list + hash)) in get_free_stripe()
440 first = (conf->inactive_list + hash)->next; in get_free_stripe()
444 atomic_inc(&conf->active_stripes); in get_free_stripe()
445 BUG_ON(hash != sh->hash_lock_index); in get_free_stripe()
446 if (list_empty(conf->inactive_list + hash)) in get_free_stripe()
447 atomic_inc(&conf->empty_inactive_list_nr); in get_free_stripe()
459 if (!sh->pages) in free_stripe_pages()
462 for (i = 0; i < sh->nr_pages; i++) { in free_stripe_pages()
463 p = sh->pages[i]; in free_stripe_pages()
466 sh->pages[i] = NULL; in free_stripe_pages()
475 for (i = 0; i < sh->nr_pages; i++) { in alloc_stripe_pages()
477 if (sh->pages[i]) in alloc_stripe_pages()
483 return -ENOMEM; in alloc_stripe_pages()
485 sh->pages[i] = p; in alloc_stripe_pages()
491 init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks) in init_stripe_shared_pages() argument
495 if (sh->pages) in init_stripe_shared_pages()
498 /* Each of the sh->dev[i] need one conf->stripe_size */ in init_stripe_shared_pages()
499 cnt = PAGE_SIZE / conf->stripe_size; in init_stripe_shared_pages()
500 nr_pages = (disks + cnt - 1) / cnt; in init_stripe_shared_pages()
502 sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); in init_stripe_shared_pages()
503 if (!sh->pages) in init_stripe_shared_pages()
504 return -ENOMEM; in init_stripe_shared_pages()
505 sh->nr_pages = nr_pages; in init_stripe_shared_pages()
506 sh->stripes_per_page = cnt; in init_stripe_shared_pages()
514 int num = sh->raid_conf->pool_size; in shrink_buffers()
520 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); in shrink_buffers()
521 p = sh->dev[i].page; in shrink_buffers()
524 sh->dev[i].page = NULL; in shrink_buffers()
529 sh->dev[i].page = NULL; in shrink_buffers()
537 int num = sh->raid_conf->pool_size; in grow_buffers()
546 sh->dev[i].page = page; in grow_buffers()
547 sh->dev[i].orig_page = page; in grow_buffers()
548 sh->dev[i].offset = 0; in grow_buffers()
552 return -ENOMEM; in grow_buffers()
555 sh->dev[i].page = raid5_get_dev_page(sh, i); in grow_buffers()
556 sh->dev[i].orig_page = sh->dev[i].page; in grow_buffers()
557 sh->dev[i].offset = raid5_get_page_offset(sh, i); in grow_buffers()
563 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
568 struct r5conf *conf = sh->raid_conf; in init_stripe() local
571 BUG_ON(atomic_read(&sh->count) != 0); in init_stripe()
572 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); in init_stripe()
574 BUG_ON(sh->batch_head); in init_stripe()
579 seq = read_seqcount_begin(&conf->gen_lock); in init_stripe()
580 sh->generation = conf->generation - previous; in init_stripe()
581 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; in init_stripe()
582 sh->sector = sector; in init_stripe()
583 stripe_set_idx(sector, conf, previous, sh); in init_stripe()
584 sh->state = 0; in init_stripe()
586 for (i = sh->disks; i--; ) { in init_stripe()
587 struct r5dev *dev = &sh->dev[i]; in init_stripe()
589 if (dev->toread || dev->read || dev->towrite || dev->written || in init_stripe()
590 test_bit(R5_LOCKED, &dev->flags)) { in init_stripe()
592 (unsigned long long)sh->sector, i, dev->toread, in init_stripe()
593 dev->read, dev->towrite, dev->written, in init_stripe()
594 test_bit(R5_LOCKED, &dev->flags)); in init_stripe()
597 dev->flags = 0; in init_stripe()
598 dev->sector = raid5_compute_blocknr(sh, i, previous); in init_stripe()
600 if (read_seqcount_retry(&conf->gen_lock, seq)) in init_stripe()
602 sh->overwrite_disks = 0; in init_stripe()
603 insert_hash(conf, sh); in init_stripe()
604 sh->cpu = smp_processor_id(); in init_stripe()
605 set_bit(STRIPE_BATCH_READY, &sh->state); in init_stripe()
608 static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, in __find_stripe() argument
614 hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) in __find_stripe()
615 if (sh->sector == sector && sh->generation == generation) in __find_stripe()
623 * - start an array
624 * - remove non-faulty devices
625 * - add a spare
626 * - allow a reshape
631 * of the two sections, and some non-in_sync devices may
634 int raid5_calc_degraded(struct r5conf *conf) in raid5_calc_degraded() argument
641 for (i = 0; i < conf->previous_raid_disks; i++) { in raid5_calc_degraded()
642 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_calc_degraded()
643 if (rdev && test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
644 rdev = rcu_dereference(conf->disks[i].replacement); in raid5_calc_degraded()
645 if (!rdev || test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
647 else if (test_bit(In_sync, &rdev->flags)) in raid5_calc_degraded()
650 /* not in-sync or faulty. in raid5_calc_degraded()
657 * be in-sync. in raid5_calc_degraded()
659 if (conf->raid_disks >= conf->previous_raid_disks) in raid5_calc_degraded()
663 if (conf->raid_disks == conf->previous_raid_disks) in raid5_calc_degraded()
667 for (i = 0; i < conf->raid_disks; i++) { in raid5_calc_degraded()
668 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_calc_degraded()
669 if (rdev && test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
670 rdev = rcu_dereference(conf->disks[i].replacement); in raid5_calc_degraded()
671 if (!rdev || test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
673 else if (test_bit(In_sync, &rdev->flags)) in raid5_calc_degraded()
676 /* not in-sync or faulty. in raid5_calc_degraded()
681 if (conf->raid_disks <= conf->previous_raid_disks) in raid5_calc_degraded()
690 static bool has_failed(struct r5conf *conf) in has_failed() argument
692 int degraded = conf->mddev->degraded; in has_failed()
694 if (test_bit(MD_BROKEN, &conf->mddev->flags)) in has_failed()
697 if (conf->mddev->reshape_position != MaxSector) in has_failed()
698 degraded = raid5_calc_degraded(conf); in has_failed()
700 return degraded > conf->max_degraded; in has_failed()
704 raid5_get_active_stripe(struct r5conf *conf, sector_t sector, in raid5_get_active_stripe() argument
708 int hash = stripe_hash_locks_hash(conf, sector); in raid5_get_active_stripe()
713 spin_lock_irq(conf->hash_locks + hash); in raid5_get_active_stripe()
716 wait_event_lock_irq(conf->wait_for_quiescent, in raid5_get_active_stripe()
717 conf->quiesce == 0 || noquiesce, in raid5_get_active_stripe()
718 *(conf->hash_locks + hash)); in raid5_get_active_stripe()
719 sh = __find_stripe(conf, sector, conf->generation - previous); in raid5_get_active_stripe()
721 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { in raid5_get_active_stripe()
722 sh = get_free_stripe(conf, hash); in raid5_get_active_stripe()
724 &conf->cache_state)) in raid5_get_active_stripe()
726 &conf->cache_state); in raid5_get_active_stripe()
731 r5c_check_stripe_cache_usage(conf); in raid5_get_active_stripe()
734 &conf->cache_state); in raid5_get_active_stripe()
735 r5l_wake_reclaim(conf->log, 0); in raid5_get_active_stripe()
737 conf->wait_for_stripe, in raid5_get_active_stripe()
738 !list_empty(conf->inactive_list + hash) && in raid5_get_active_stripe()
739 (atomic_read(&conf->active_stripes) in raid5_get_active_stripe()
740 < (conf->max_nr_stripes * 3 / 4) in raid5_get_active_stripe()
742 &conf->cache_state)), in raid5_get_active_stripe()
743 *(conf->hash_locks + hash)); in raid5_get_active_stripe()
745 &conf->cache_state); in raid5_get_active_stripe()
748 atomic_inc(&sh->count); in raid5_get_active_stripe()
750 } else if (!atomic_inc_not_zero(&sh->count)) { in raid5_get_active_stripe()
751 spin_lock(&conf->device_lock); in raid5_get_active_stripe()
752 if (!atomic_read(&sh->count)) { in raid5_get_active_stripe()
753 if (!test_bit(STRIPE_HANDLE, &sh->state)) in raid5_get_active_stripe()
754 atomic_inc(&conf->active_stripes); in raid5_get_active_stripe()
755 BUG_ON(list_empty(&sh->lru) && in raid5_get_active_stripe()
756 !test_bit(STRIPE_EXPANDING, &sh->state)); in raid5_get_active_stripe()
758 if (!list_empty(conf->inactive_list + hash)) in raid5_get_active_stripe()
760 list_del_init(&sh->lru); in raid5_get_active_stripe()
761 if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) in raid5_get_active_stripe()
762 atomic_inc(&conf->empty_inactive_list_nr); in raid5_get_active_stripe()
763 if (sh->group) { in raid5_get_active_stripe()
764 sh->group->stripes_cnt--; in raid5_get_active_stripe()
765 sh->group = NULL; in raid5_get_active_stripe()
768 atomic_inc(&sh->count); in raid5_get_active_stripe()
769 spin_unlock(&conf->device_lock); in raid5_get_active_stripe()
773 spin_unlock_irq(conf->hash_locks + hash); in raid5_get_active_stripe()
779 BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); in is_full_stripe_write()
780 return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded); in is_full_stripe_write()
784 __acquires(&sh1->stripe_lock) in lock_two_stripes()
785 __acquires(&sh2->stripe_lock) in lock_two_stripes()
788 spin_lock_irq(&sh2->stripe_lock); in lock_two_stripes()
789 spin_lock_nested(&sh1->stripe_lock, 1); in lock_two_stripes()
791 spin_lock_irq(&sh1->stripe_lock); in lock_two_stripes()
792 spin_lock_nested(&sh2->stripe_lock, 1); in lock_two_stripes()
797 __releases(&sh1->stripe_lock) in unlock_two_stripes()
798 __releases(&sh2->stripe_lock) in unlock_two_stripes()
800 spin_unlock(&sh1->stripe_lock); in unlock_two_stripes()
801 spin_unlock_irq(&sh2->stripe_lock); in unlock_two_stripes()
807 struct r5conf *conf = sh->raid_conf; in stripe_can_batch() local
809 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in stripe_can_batch()
811 return test_bit(STRIPE_BATCH_READY, &sh->state) && in stripe_can_batch()
812 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && in stripe_can_batch()
817 static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh) in stripe_add_to_batch_list() argument
826 tmp_sec = sh->sector; in stripe_add_to_batch_list()
827 if (!sector_div(tmp_sec, conf->chunk_sectors)) in stripe_add_to_batch_list()
829 head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf); in stripe_add_to_batch_list()
831 hash = stripe_hash_locks_hash(conf, head_sector); in stripe_add_to_batch_list()
832 spin_lock_irq(conf->hash_locks + hash); in stripe_add_to_batch_list()
833 head = __find_stripe(conf, head_sector, conf->generation); in stripe_add_to_batch_list()
834 if (head && !atomic_inc_not_zero(&head->count)) { in stripe_add_to_batch_list()
835 spin_lock(&conf->device_lock); in stripe_add_to_batch_list()
836 if (!atomic_read(&head->count)) { in stripe_add_to_batch_list()
837 if (!test_bit(STRIPE_HANDLE, &head->state)) in stripe_add_to_batch_list()
838 atomic_inc(&conf->active_stripes); in stripe_add_to_batch_list()
839 BUG_ON(list_empty(&head->lru) && in stripe_add_to_batch_list()
840 !test_bit(STRIPE_EXPANDING, &head->state)); in stripe_add_to_batch_list()
842 if (!list_empty(conf->inactive_list + hash)) in stripe_add_to_batch_list()
844 list_del_init(&head->lru); in stripe_add_to_batch_list()
845 if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) in stripe_add_to_batch_list()
846 atomic_inc(&conf->empty_inactive_list_nr); in stripe_add_to_batch_list()
847 if (head->group) { in stripe_add_to_batch_list()
848 head->group->stripes_cnt--; in stripe_add_to_batch_list()
849 head->group = NULL; in stripe_add_to_batch_list()
852 atomic_inc(&head->count); in stripe_add_to_batch_list()
853 spin_unlock(&conf->device_lock); in stripe_add_to_batch_list()
855 spin_unlock_irq(conf->hash_locks + hash); in stripe_add_to_batch_list()
867 if (sh->batch_head) in stripe_add_to_batch_list()
871 while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in stripe_add_to_batch_list()
873 if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf || in stripe_add_to_batch_list()
874 bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite)) in stripe_add_to_batch_list()
877 if (head->batch_head) { in stripe_add_to_batch_list()
878 spin_lock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
881 spin_unlock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
888 * this stripe->batch_head doesn't get assigned, which in stripe_add_to_batch_list()
891 sh->batch_head = head->batch_head; in stripe_add_to_batch_list()
897 list_add(&sh->batch_list, &head->batch_list); in stripe_add_to_batch_list()
898 spin_unlock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
900 head->batch_head = head; in stripe_add_to_batch_list()
901 sh->batch_head = head->batch_head; in stripe_add_to_batch_list()
902 spin_lock(&head->batch_lock); in stripe_add_to_batch_list()
903 list_add_tail(&sh->batch_list, &head->batch_list); in stripe_add_to_batch_list()
904 spin_unlock(&head->batch_lock); in stripe_add_to_batch_list()
907 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in stripe_add_to_batch_list()
908 if (atomic_dec_return(&conf->preread_active_stripes) in stripe_add_to_batch_list()
910 md_wakeup_thread(conf->mddev->thread); in stripe_add_to_batch_list()
912 if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { in stripe_add_to_batch_list()
913 int seq = sh->bm_seq; in stripe_add_to_batch_list()
914 if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && in stripe_add_to_batch_list()
915 sh->batch_head->bm_seq > seq) in stripe_add_to_batch_list()
916 seq = sh->batch_head->bm_seq; in stripe_add_to_batch_list()
917 set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); in stripe_add_to_batch_list()
918 sh->batch_head->bm_seq = seq; in stripe_add_to_batch_list()
921 atomic_inc(&sh->count); in stripe_add_to_batch_list()
931 static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) in use_new_offset() argument
933 sector_t progress = conf->reshape_progress; in use_new_offset()
935 * of conf->generation, or ->data_offset that was set before in use_new_offset()
941 if (sh->generation == conf->generation - 1) in use_new_offset()
943 /* We are in a reshape, and this is a new-generation stripe, in use_new_offset()
964 if (da->sector > db->sector) in cmp_stripe()
966 if (da->sector < db->sector) in cmp_stripe()
967 return -1; in cmp_stripe()
971 static void dispatch_defer_bios(struct r5conf *conf, int target, in dispatch_defer_bios() argument
978 if (conf->pending_data_cnt == 0) in dispatch_defer_bios()
981 list_sort(NULL, &conf->pending_list, cmp_stripe); in dispatch_defer_bios()
983 first = conf->pending_list.next; in dispatch_defer_bios()
986 if (conf->next_pending_data) in dispatch_defer_bios()
987 list_move_tail(&conf->pending_list, in dispatch_defer_bios()
988 &conf->next_pending_data->sibling); in dispatch_defer_bios()
990 while (!list_empty(&conf->pending_list)) { in dispatch_defer_bios()
991 data = list_first_entry(&conf->pending_list, in dispatch_defer_bios()
993 if (&data->sibling == first) in dispatch_defer_bios()
994 first = data->sibling.next; in dispatch_defer_bios()
995 next = data->sibling.next; in dispatch_defer_bios()
997 bio_list_merge(list, &data->bios); in dispatch_defer_bios()
998 list_move(&data->sibling, &conf->free_list); in dispatch_defer_bios()
1003 conf->pending_data_cnt -= cnt; in dispatch_defer_bios()
1004 BUG_ON(conf->pending_data_cnt < 0 || cnt < target); in dispatch_defer_bios()
1006 if (next != &conf->pending_list) in dispatch_defer_bios()
1007 conf->next_pending_data = list_entry(next, in dispatch_defer_bios()
1010 conf->next_pending_data = NULL; in dispatch_defer_bios()
1012 if (first != &conf->pending_list) in dispatch_defer_bios()
1013 list_move_tail(&conf->pending_list, first); in dispatch_defer_bios()
1016 static void flush_deferred_bios(struct r5conf *conf) in flush_deferred_bios() argument
1020 if (conf->pending_data_cnt == 0) in flush_deferred_bios()
1023 spin_lock(&conf->pending_bios_lock); in flush_deferred_bios()
1024 dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp); in flush_deferred_bios()
1025 BUG_ON(conf->pending_data_cnt != 0); in flush_deferred_bios()
1026 spin_unlock(&conf->pending_bios_lock); in flush_deferred_bios()
1031 static void defer_issue_bios(struct r5conf *conf, sector_t sector, in defer_issue_bios() argument
1037 spin_lock(&conf->pending_bios_lock); in defer_issue_bios()
1038 ent = list_first_entry(&conf->free_list, struct r5pending_data, in defer_issue_bios()
1040 list_move_tail(&ent->sibling, &conf->pending_list); in defer_issue_bios()
1041 ent->sector = sector; in defer_issue_bios()
1042 bio_list_init(&ent->bios); in defer_issue_bios()
1043 bio_list_merge(&ent->bios, bios); in defer_issue_bios()
1044 conf->pending_data_cnt++; in defer_issue_bios()
1045 if (conf->pending_data_cnt >= PENDING_IO_MAX) in defer_issue_bios()
1046 dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp); in defer_issue_bios()
1048 spin_unlock(&conf->pending_bios_lock); in defer_issue_bios()
1060 struct r5conf *conf = sh->raid_conf; in ops_run_io() local
1061 int i, disks = sh->disks; in ops_run_io()
1071 should_defer = conf->batch_bio_dispatch && conf->group_cnt; in ops_run_io()
1073 for (i = disks; i--; ) { in ops_run_io()
1080 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { in ops_run_io()
1082 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) in ops_run_io()
1084 if (test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_io()
1086 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) in ops_run_io()
1089 &sh->dev[i].flags)) { in ops_run_io()
1094 if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags)) in ops_run_io()
1098 bi = &sh->dev[i].req; in ops_run_io()
1099 rbi = &sh->dev[i].rreq; /* For writing to replacement */ in ops_run_io()
1102 rrdev = rcu_dereference(conf->disks[i].replacement); in ops_run_io()
1104 rdev = rcu_dereference(conf->disks[i].rdev); in ops_run_io()
1116 if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev) in ops_run_io()
1121 if (rdev && test_bit(Faulty, &rdev->flags)) in ops_run_io()
1124 atomic_inc(&rdev->nr_pending); in ops_run_io()
1125 if (rrdev && test_bit(Faulty, &rrdev->flags)) in ops_run_io()
1128 atomic_inc(&rrdev->nr_pending); in ops_run_io()
1136 test_bit(WriteErrorSeen, &rdev->flags)) { in ops_run_io()
1139 int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in ops_run_io()
1145 set_bit(BlockedBadBlocks, &rdev->flags); in ops_run_io()
1146 if (!conf->mddev->external && in ops_run_io()
1147 conf->mddev->sb_flags) { in ops_run_io()
1150 * bad block log - better give it in ops_run_io()
1152 md_check_recovery(conf->mddev); in ops_run_io()
1159 atomic_inc(&rdev->nr_pending); in ops_run_io()
1160 md_wait_for_blocked_rdev(rdev, conf->mddev); in ops_run_io()
1162 /* Acknowledged bad block - skip the write */ in ops_run_io()
1163 rdev_dec_pending(rdev, conf->mddev); in ops_run_io()
1169 if (s->syncing || s->expanding || s->expanded in ops_run_io()
1170 || s->replacing) in ops_run_io()
1171 md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf)); in ops_run_io()
1173 set_bit(STRIPE_IO_STARTED, &sh->state); in ops_run_io()
1175 bio_set_dev(bi, rdev->bdev); in ops_run_io()
1177 bi->bi_end_io = op_is_write(op) in ops_run_io()
1180 bi->bi_private = sh; in ops_run_io()
1183 __func__, (unsigned long long)sh->sector, in ops_run_io()
1184 bi->bi_opf, i); in ops_run_io()
1185 atomic_inc(&sh->count); in ops_run_io()
1187 atomic_inc(&head_sh->count); in ops_run_io()
1188 if (use_new_offset(conf, sh)) in ops_run_io()
1189 bi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1190 + rdev->new_data_offset); in ops_run_io()
1192 bi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1193 + rdev->data_offset); in ops_run_io()
1194 if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags)) in ops_run_io()
1195 bi->bi_opf |= REQ_NOMERGE; in ops_run_io()
1197 if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) in ops_run_io()
1198 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in ops_run_io()
1201 test_bit(R5_InJournal, &sh->dev[i].flags)) in ops_run_io()
1207 sh->dev[i].vec.bv_page = sh->dev[i].orig_page; in ops_run_io()
1209 sh->dev[i].vec.bv_page = sh->dev[i].page; in ops_run_io()
1210 bi->bi_vcnt = 1; in ops_run_io()
1211 bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1212 bi->bi_io_vec[0].bv_offset = sh->dev[i].offset; in ops_run_io()
1213 bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1214 bi->bi_write_hint = sh->dev[i].write_hint; in ops_run_io()
1216 sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; in ops_run_io()
1222 bi->bi_vcnt = 0; in ops_run_io()
1224 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); in ops_run_io()
1226 if (conf->mddev->gendisk) in ops_run_io()
1227 trace_block_bio_remap(bi->bi_disk->queue, in ops_run_io()
1228 bi, disk_devt(conf->mddev->gendisk), in ops_run_io()
1229 sh->dev[i].sector); in ops_run_io()
1236 if (s->syncing || s->expanding || s->expanded in ops_run_io()
1237 || s->replacing) in ops_run_io()
1238 md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf)); in ops_run_io()
1240 set_bit(STRIPE_IO_STARTED, &sh->state); in ops_run_io()
1242 bio_set_dev(rbi, rrdev->bdev); in ops_run_io()
1245 rbi->bi_end_io = raid5_end_write_request; in ops_run_io()
1246 rbi->bi_private = sh; in ops_run_io()
1250 __func__, (unsigned long long)sh->sector, in ops_run_io()
1251 rbi->bi_opf, i); in ops_run_io()
1252 atomic_inc(&sh->count); in ops_run_io()
1254 atomic_inc(&head_sh->count); in ops_run_io()
1255 if (use_new_offset(conf, sh)) in ops_run_io()
1256 rbi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1257 + rrdev->new_data_offset); in ops_run_io()
1259 rbi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1260 + rrdev->data_offset); in ops_run_io()
1261 if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) in ops_run_io()
1262 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in ops_run_io()
1263 sh->dev[i].rvec.bv_page = sh->dev[i].page; in ops_run_io()
1264 rbi->bi_vcnt = 1; in ops_run_io()
1265 rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1266 rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset; in ops_run_io()
1267 rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1268 rbi->bi_write_hint = sh->dev[i].write_hint; in ops_run_io()
1269 sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; in ops_run_io()
1275 rbi->bi_vcnt = 0; in ops_run_io()
1276 if (conf->mddev->gendisk) in ops_run_io()
1277 trace_block_bio_remap(rbi->bi_disk->queue, in ops_run_io()
1278 rbi, disk_devt(conf->mddev->gendisk), in ops_run_io()
1279 sh->dev[i].sector); in ops_run_io()
1287 set_bit(STRIPE_DEGRADED, &sh->state); in ops_run_io()
1289 bi->bi_opf, i, (unsigned long long)sh->sector); in ops_run_io()
1290 clear_bit(R5_LOCKED, &sh->dev[i].flags); in ops_run_io()
1291 set_bit(STRIPE_HANDLE, &sh->state); in ops_run_io()
1294 if (!head_sh->batch_head) in ops_run_io()
1296 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_io()
1303 defer_issue_bios(conf, head_sh->sector, &pending_bios); in ops_run_io()
1308 unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx, in async_copy_data() argument
1317 struct r5conf *conf = sh->raid_conf; in async_copy_data() local
1319 if (bio->bi_iter.bi_sector >= sector) in async_copy_data()
1320 page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512; in async_copy_data()
1322 page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512; in async_copy_data()
1326 init_async_submit(&submit, flags, tx, NULL, NULL, NULL); in async_copy_data()
1334 b_offset = -page_offset; in async_copy_data()
1336 len -= b_offset; in async_copy_data()
1339 if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf)) in async_copy_data()
1340 clen = RAID5_STRIPE_SIZE(conf) - page_offset; in async_copy_data()
1348 if (conf->skip_copy && in async_copy_data()
1350 clen == RAID5_STRIPE_SIZE(conf) && in async_copy_data()
1354 tx = async_memcpy(*page, bio_page, page_offset + poff, in async_copy_data()
1357 tx = async_memcpy(bio_page, *page, b_offset, in async_copy_data()
1361 submit.depend_tx = tx; in async_copy_data()
1368 return tx; in async_copy_data()
1375 struct r5conf *conf = sh->raid_conf; in ops_complete_biofill() local
1378 (unsigned long long)sh->sector); in ops_complete_biofill()
1381 for (i = sh->disks; i--; ) { in ops_complete_biofill()
1382 struct r5dev *dev = &sh->dev[i]; in ops_complete_biofill()
1389 if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { in ops_complete_biofill()
1392 BUG_ON(!dev->read); in ops_complete_biofill()
1393 rbi = dev->read; in ops_complete_biofill()
1394 dev->read = NULL; in ops_complete_biofill()
1395 while (rbi && rbi->bi_iter.bi_sector < in ops_complete_biofill()
1396 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_complete_biofill()
1397 rbi2 = r5_next_bio(conf, rbi, dev->sector); in ops_complete_biofill()
1403 clear_bit(STRIPE_BIOFILL_RUN, &sh->state); in ops_complete_biofill()
1405 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_biofill()
1411 struct dma_async_tx_descriptor *tx = NULL; in ops_run_biofill() local
1414 struct r5conf *conf = sh->raid_conf; in ops_run_biofill() local
1416 BUG_ON(sh->batch_head); in ops_run_biofill()
1418 (unsigned long long)sh->sector); in ops_run_biofill()
1420 for (i = sh->disks; i--; ) { in ops_run_biofill()
1421 struct r5dev *dev = &sh->dev[i]; in ops_run_biofill()
1422 if (test_bit(R5_Wantfill, &dev->flags)) { in ops_run_biofill()
1424 spin_lock_irq(&sh->stripe_lock); in ops_run_biofill()
1425 dev->read = rbi = dev->toread; in ops_run_biofill()
1426 dev->toread = NULL; in ops_run_biofill()
1427 spin_unlock_irq(&sh->stripe_lock); in ops_run_biofill()
1428 while (rbi && rbi->bi_iter.bi_sector < in ops_run_biofill()
1429 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_run_biofill()
1430 tx = async_copy_data(0, rbi, &dev->page, in ops_run_biofill()
1431 dev->offset, in ops_run_biofill()
1432 dev->sector, tx, sh, 0); in ops_run_biofill()
1433 rbi = r5_next_bio(conf, rbi, dev->sector); in ops_run_biofill()
1438 atomic_inc(&sh->count); in ops_run_biofill()
1439 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); in ops_run_biofill()
1450 tgt = &sh->dev[target]; in mark_target_uptodate()
1451 set_bit(R5_UPTODATE, &tgt->flags); in mark_target_uptodate()
1452 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in mark_target_uptodate()
1453 clear_bit(R5_Wantcompute, &tgt->flags); in mark_target_uptodate()
1461 (unsigned long long)sh->sector); in ops_complete_compute()
1464 mark_target_uptodate(sh, sh->ops.target); in ops_complete_compute()
1465 mark_target_uptodate(sh, sh->ops.target2); in ops_complete_compute()
1467 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); in ops_complete_compute()
1468 if (sh->check_state == check_state_compute_run) in ops_complete_compute()
1469 sh->check_state = check_state_compute_result; in ops_complete_compute()
1470 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_compute()
1477 return percpu->scribble + i * percpu->scribble_obj_size; in to_addr_page()
1484 return (void *) (to_addr_page(percpu, i) + sh->disks + 2); in to_addr_conv()
1493 return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2); in to_addr_offs()
1499 int disks = sh->disks; in ops_run_compute5()
1502 int target = sh->ops.target; in ops_run_compute5()
1503 struct r5dev *tgt = &sh->dev[target]; in ops_run_compute5()
1504 struct page *xor_dest = tgt->page; in ops_run_compute5()
1505 unsigned int off_dest = tgt->offset; in ops_run_compute5()
1507 struct dma_async_tx_descriptor *tx; in ops_run_compute5() local
1511 BUG_ON(sh->batch_head); in ops_run_compute5()
1514 __func__, (unsigned long long)sh->sector, target); in ops_run_compute5()
1515 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute5()
1517 for (i = disks; i--; ) { in ops_run_compute5()
1519 off_srcs[count] = sh->dev[i].offset; in ops_run_compute5()
1520 xor_srcs[count++] = sh->dev[i].page; in ops_run_compute5()
1524 atomic_inc(&sh->count); in ops_run_compute5()
1529 tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], in ops_run_compute5()
1530 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute5()
1532 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_compute5()
1533 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute5()
1535 return tx; in ops_run_compute5()
1538 /* set_syndrome_sources - populate source buffers for gen_syndrome
1539 * @srcs - (struct page *) array of size sh->disks
1540 * @offs - (unsigned int) array of offset for each page
1541 * @sh - stripe_head to parse
1553 int disks = sh->disks; in set_syndrome_sources()
1554 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); in set_syndrome_sources()
1566 struct r5dev *dev = &sh->dev[i]; in set_syndrome_sources()
1568 if (i == sh->qd_idx || i == sh->pd_idx || in set_syndrome_sources()
1571 (test_bit(R5_Wantdrain, &dev->flags) || in set_syndrome_sources()
1572 test_bit(R5_InJournal, &dev->flags))) || in set_syndrome_sources()
1574 (dev->written || in set_syndrome_sources()
1575 test_bit(R5_InJournal, &dev->flags)))) { in set_syndrome_sources()
1576 if (test_bit(R5_InJournal, &dev->flags)) in set_syndrome_sources()
1577 srcs[slot] = sh->dev[i].orig_page; in set_syndrome_sources()
1579 srcs[slot] = sh->dev[i].page; in set_syndrome_sources()
1585 offs[slot] = sh->dev[i].offset; in set_syndrome_sources()
1596 int disks = sh->disks; in ops_run_compute6_1()
1600 int qd_idx = sh->qd_idx; in ops_run_compute6_1()
1601 struct dma_async_tx_descriptor *tx; in ops_run_compute6_1() local
1609 BUG_ON(sh->batch_head); in ops_run_compute6_1()
1610 if (sh->ops.target < 0) in ops_run_compute6_1()
1611 target = sh->ops.target2; in ops_run_compute6_1()
1612 else if (sh->ops.target2 < 0) in ops_run_compute6_1()
1613 target = sh->ops.target; in ops_run_compute6_1()
1619 __func__, (unsigned long long)sh->sector, target); in ops_run_compute6_1()
1621 tgt = &sh->dev[target]; in ops_run_compute6_1()
1622 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute6_1()
1623 dest = tgt->page; in ops_run_compute6_1()
1624 dest_off = tgt->offset; in ops_run_compute6_1()
1626 atomic_inc(&sh->count); in ops_run_compute6_1()
1635 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_compute6_1()
1636 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute6_1()
1638 /* Compute any data- or p-drive using XOR */ in ops_run_compute6_1()
1640 for (i = disks; i-- ; ) { in ops_run_compute6_1()
1643 offs[count] = sh->dev[i].offset; in ops_run_compute6_1()
1644 blocks[count++] = sh->dev[i].page; in ops_run_compute6_1()
1650 tx = async_xor_offs(dest, dest_off, blocks, offs, count, in ops_run_compute6_1()
1651 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute6_1()
1654 return tx; in ops_run_compute6_1()
1660 int i, count, disks = sh->disks; in ops_run_compute6_2()
1661 int syndrome_disks = sh->ddf_layout ? disks : disks-2; in ops_run_compute6_2()
1663 int faila = -1, failb = -1; in ops_run_compute6_2()
1664 int target = sh->ops.target; in ops_run_compute6_2()
1665 int target2 = sh->ops.target2; in ops_run_compute6_2()
1666 struct r5dev *tgt = &sh->dev[target]; in ops_run_compute6_2()
1667 struct r5dev *tgt2 = &sh->dev[target2]; in ops_run_compute6_2()
1668 struct dma_async_tx_descriptor *tx; in ops_run_compute6_2() local
1673 BUG_ON(sh->batch_head); in ops_run_compute6_2()
1675 __func__, (unsigned long long)sh->sector, target, target2); in ops_run_compute6_2()
1677 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute6_2()
1678 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); in ops_run_compute6_2()
1680 /* we need to open-code set_syndrome_sources to handle the in ops_run_compute6_2()
1692 offs[slot] = sh->dev[i].offset; in ops_run_compute6_2()
1693 blocks[slot] = sh->dev[i].page; in ops_run_compute6_2()
1706 __func__, (unsigned long long)sh->sector, faila, failb); in ops_run_compute6_2()
1708 atomic_inc(&sh->count); in ops_run_compute6_2()
1718 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1724 int qd_idx = sh->qd_idx; in ops_run_compute6_2()
1733 for (i = disks; i-- ; ) { in ops_run_compute6_2()
1736 offs[count] = sh->dev[i].offset; in ops_run_compute6_2()
1737 blocks[count++] = sh->dev[i].page; in ops_run_compute6_2()
1739 dest = sh->dev[data_target].page; in ops_run_compute6_2()
1740 dest_off = sh->dev[data_target].offset; in ops_run_compute6_2()
1745 tx = async_xor_offs(dest, dest_off, blocks, offs, count, in ops_run_compute6_2()
1746 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1750 init_async_submit(&submit, ASYNC_TX_FENCE, tx, in ops_run_compute6_2()
1754 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1764 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1770 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1782 (unsigned long long)sh->sector); in ops_complete_prexor()
1784 if (r5c_is_writeback(sh->raid_conf->log)) in ops_complete_prexor()
1786 * raid5-cache write back uses orig_page during prexor. in ops_complete_prexor()
1794 struct dma_async_tx_descriptor *tx) in ops_run_prexor5() argument
1796 int disks = sh->disks; in ops_run_prexor5()
1799 int count = 0, pd_idx = sh->pd_idx, i; in ops_run_prexor5()
1803 unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset; in ops_run_prexor5()
1804 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; in ops_run_prexor5()
1806 BUG_ON(sh->batch_head); in ops_run_prexor5()
1808 (unsigned long long)sh->sector); in ops_run_prexor5()
1810 for (i = disks; i--; ) { in ops_run_prexor5()
1811 struct r5dev *dev = &sh->dev[i]; in ops_run_prexor5()
1813 if (test_bit(R5_InJournal, &dev->flags)) { in ops_run_prexor5()
1818 off_srcs[count] = dev->offset; in ops_run_prexor5()
1819 xor_srcs[count++] = dev->orig_page; in ops_run_prexor5()
1820 } else if (test_bit(R5_Wantdrain, &dev->flags)) { in ops_run_prexor5()
1821 off_srcs[count] = dev->offset; in ops_run_prexor5()
1822 xor_srcs[count++] = dev->page; in ops_run_prexor5()
1826 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, in ops_run_prexor5()
1828 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_prexor5()
1829 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_prexor5()
1831 return tx; in ops_run_prexor5()
1836 struct dma_async_tx_descriptor *tx) in ops_run_prexor6() argument
1844 (unsigned long long)sh->sector); in ops_run_prexor6()
1848 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, in ops_run_prexor6()
1850 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_prexor6()
1851 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_prexor6()
1853 return tx; in ops_run_prexor6()
1857 ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) in ops_run_biodrain() argument
1859 struct r5conf *conf = sh->raid_conf; in ops_run_biodrain() local
1860 int disks = sh->disks; in ops_run_biodrain()
1865 (unsigned long long)sh->sector); in ops_run_biodrain()
1867 for (i = disks; i--; ) { in ops_run_biodrain()
1872 if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) { in ops_run_biodrain()
1876 dev = &sh->dev[i]; in ops_run_biodrain()
1881 clear_bit(R5_InJournal, &dev->flags); in ops_run_biodrain()
1882 spin_lock_irq(&sh->stripe_lock); in ops_run_biodrain()
1883 chosen = dev->towrite; in ops_run_biodrain()
1884 dev->towrite = NULL; in ops_run_biodrain()
1885 sh->overwrite_disks = 0; in ops_run_biodrain()
1886 BUG_ON(dev->written); in ops_run_biodrain()
1887 wbi = dev->written = chosen; in ops_run_biodrain()
1888 spin_unlock_irq(&sh->stripe_lock); in ops_run_biodrain()
1889 WARN_ON(dev->page != dev->orig_page); in ops_run_biodrain()
1891 while (wbi && wbi->bi_iter.bi_sector < in ops_run_biodrain()
1892 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_run_biodrain()
1893 if (wbi->bi_opf & REQ_FUA) in ops_run_biodrain()
1894 set_bit(R5_WantFUA, &dev->flags); in ops_run_biodrain()
1895 if (wbi->bi_opf & REQ_SYNC) in ops_run_biodrain()
1896 set_bit(R5_SyncIO, &dev->flags); in ops_run_biodrain()
1898 set_bit(R5_Discard, &dev->flags); in ops_run_biodrain()
1900 tx = async_copy_data(1, wbi, &dev->page, in ops_run_biodrain()
1901 dev->offset, in ops_run_biodrain()
1902 dev->sector, tx, sh, in ops_run_biodrain()
1903 r5c_is_writeback(conf->log)); in ops_run_biodrain()
1904 if (dev->page != dev->orig_page && in ops_run_biodrain()
1905 !r5c_is_writeback(conf->log)) { in ops_run_biodrain()
1906 set_bit(R5_SkipCopy, &dev->flags); in ops_run_biodrain()
1907 clear_bit(R5_UPTODATE, &dev->flags); in ops_run_biodrain()
1908 clear_bit(R5_OVERWRITE, &dev->flags); in ops_run_biodrain()
1911 wbi = r5_next_bio(conf, wbi, dev->sector); in ops_run_biodrain()
1914 if (head_sh->batch_head) { in ops_run_biodrain()
1915 sh = list_first_entry(&sh->batch_list, in ops_run_biodrain()
1925 return tx; in ops_run_biodrain()
1931 int disks = sh->disks; in ops_complete_reconstruct()
1932 int pd_idx = sh->pd_idx; in ops_complete_reconstruct()
1933 int qd_idx = sh->qd_idx; in ops_complete_reconstruct()
1938 (unsigned long long)sh->sector); in ops_complete_reconstruct()
1940 for (i = disks; i--; ) { in ops_complete_reconstruct()
1941 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); in ops_complete_reconstruct()
1942 sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); in ops_complete_reconstruct()
1943 discard |= test_bit(R5_Discard, &sh->dev[i].flags); in ops_complete_reconstruct()
1946 for (i = disks; i--; ) { in ops_complete_reconstruct()
1947 struct r5dev *dev = &sh->dev[i]; in ops_complete_reconstruct()
1949 if (dev->written || i == pd_idx || i == qd_idx) { in ops_complete_reconstruct()
1950 if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { in ops_complete_reconstruct()
1951 set_bit(R5_UPTODATE, &dev->flags); in ops_complete_reconstruct()
1952 if (test_bit(STRIPE_EXPAND_READY, &sh->state)) in ops_complete_reconstruct()
1953 set_bit(R5_Expanded, &dev->flags); in ops_complete_reconstruct()
1956 set_bit(R5_WantFUA, &dev->flags); in ops_complete_reconstruct()
1958 set_bit(R5_SyncIO, &dev->flags); in ops_complete_reconstruct()
1962 if (sh->reconstruct_state == reconstruct_state_drain_run) in ops_complete_reconstruct()
1963 sh->reconstruct_state = reconstruct_state_drain_result; in ops_complete_reconstruct()
1964 else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) in ops_complete_reconstruct()
1965 sh->reconstruct_state = reconstruct_state_prexor_drain_result; in ops_complete_reconstruct()
1967 BUG_ON(sh->reconstruct_state != reconstruct_state_run); in ops_complete_reconstruct()
1968 sh->reconstruct_state = reconstruct_state_result; in ops_complete_reconstruct()
1971 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_reconstruct()
1977 struct dma_async_tx_descriptor *tx) in ops_run_reconstruct5() argument
1979 int disks = sh->disks; in ops_run_reconstruct5()
1983 int count, pd_idx = sh->pd_idx, i; in ops_run_reconstruct5()
1993 (unsigned long long)sh->sector); in ops_run_reconstruct5()
1995 for (i = 0; i < sh->disks; i++) { in ops_run_reconstruct5()
1998 if (!test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_reconstruct5()
2001 if (i >= sh->disks) { in ops_run_reconstruct5()
2002 atomic_inc(&sh->count); in ops_run_reconstruct5()
2003 set_bit(R5_Discard, &sh->dev[pd_idx].flags); in ops_run_reconstruct5()
2012 * that are part of a read-modify-write (written) in ops_run_reconstruct5()
2014 if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_reconstruct5()
2016 off_dest = off_srcs[count] = sh->dev[pd_idx].offset; in ops_run_reconstruct5()
2017 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; in ops_run_reconstruct5()
2018 for (i = disks; i--; ) { in ops_run_reconstruct5()
2019 struct r5dev *dev = &sh->dev[i]; in ops_run_reconstruct5()
2020 if (head_sh->dev[i].written || in ops_run_reconstruct5()
2021 test_bit(R5_InJournal, &head_sh->dev[i].flags)) { in ops_run_reconstruct5()
2022 off_srcs[count] = dev->offset; in ops_run_reconstruct5()
2023 xor_srcs[count++] = dev->page; in ops_run_reconstruct5()
2027 xor_dest = sh->dev[pd_idx].page; in ops_run_reconstruct5()
2028 off_dest = sh->dev[pd_idx].offset; in ops_run_reconstruct5()
2029 for (i = disks; i--; ) { in ops_run_reconstruct5()
2030 struct r5dev *dev = &sh->dev[i]; in ops_run_reconstruct5()
2032 off_srcs[count] = dev->offset; in ops_run_reconstruct5()
2033 xor_srcs[count++] = dev->page; in ops_run_reconstruct5()
2043 last_stripe = !head_sh->batch_head || in ops_run_reconstruct5()
2044 list_first_entry(&sh->batch_list, in ops_run_reconstruct5()
2050 atomic_inc(&head_sh->count); in ops_run_reconstruct5()
2051 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh, in ops_run_reconstruct5()
2055 init_async_submit(&submit, flags, tx, NULL, NULL, in ops_run_reconstruct5()
2060 tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], in ops_run_reconstruct5()
2061 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct5()
2063 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_reconstruct5()
2064 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct5()
2067 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_reconstruct5()
2075 struct dma_async_tx_descriptor *tx) in ops_run_reconstruct6() argument
2086 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); in ops_run_reconstruct6()
2088 for (i = 0; i < sh->disks; i++) { in ops_run_reconstruct6()
2089 if (sh->pd_idx == i || sh->qd_idx == i) in ops_run_reconstruct6()
2091 if (!test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_reconstruct6()
2094 if (i >= sh->disks) { in ops_run_reconstruct6()
2095 atomic_inc(&sh->count); in ops_run_reconstruct6()
2096 set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); in ops_run_reconstruct6()
2097 set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); in ops_run_reconstruct6()
2106 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_reconstruct6()
2115 last_stripe = !head_sh->batch_head || in ops_run_reconstruct6()
2116 list_first_entry(&sh->batch_list, in ops_run_reconstruct6()
2120 atomic_inc(&head_sh->count); in ops_run_reconstruct6()
2121 init_async_submit(&submit, txflags, tx, ops_complete_reconstruct, in ops_run_reconstruct6()
2124 init_async_submit(&submit, 0, tx, NULL, NULL, in ops_run_reconstruct6()
2126 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_reconstruct6()
2127 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct6()
2130 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_reconstruct6()
2141 (unsigned long long)sh->sector); in ops_complete_check()
2143 sh->check_state = check_state_check_result; in ops_complete_check()
2144 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_check()
2150 int disks = sh->disks; in ops_run_check_p()
2151 int pd_idx = sh->pd_idx; in ops_run_check_p()
2152 int qd_idx = sh->qd_idx; in ops_run_check_p()
2157 struct dma_async_tx_descriptor *tx; in ops_run_check_p() local
2163 (unsigned long long)sh->sector); in ops_run_check_p()
2165 BUG_ON(sh->batch_head); in ops_run_check_p()
2167 xor_dest = sh->dev[pd_idx].page; in ops_run_check_p()
2168 off_dest = sh->dev[pd_idx].offset; in ops_run_check_p()
2171 for (i = disks; i--; ) { in ops_run_check_p()
2174 off_srcs[count] = sh->dev[i].offset; in ops_run_check_p()
2175 xor_srcs[count++] = sh->dev[i].page; in ops_run_check_p()
2180 tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_check_p()
2181 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_check_p()
2182 &sh->ops.zero_sum_result, &submit); in ops_run_check_p()
2184 atomic_inc(&sh->count); in ops_run_check_p()
2185 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); in ops_run_check_p()
2186 tx = async_trigger_callback(&submit); in ops_run_check_p()
2197 (unsigned long long)sh->sector, checkp); in ops_run_check_pq()
2199 BUG_ON(sh->batch_head); in ops_run_check_pq()
2204 atomic_inc(&sh->count); in ops_run_check_pq()
2208 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_check_pq()
2209 &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); in ops_run_check_pq()
2214 int overlap_clear = 0, i, disks = sh->disks; in raid_run_ops()
2215 struct dma_async_tx_descriptor *tx = NULL; in raid_run_ops() local
2216 struct r5conf *conf = sh->raid_conf; in raid_run_ops() local
2217 int level = conf->level; in raid_run_ops()
2222 percpu = per_cpu_ptr(conf->percpu, cpu); in raid_run_ops()
2230 tx = ops_run_compute5(sh, percpu); in raid_run_ops()
2232 if (sh->ops.target2 < 0 || sh->ops.target < 0) in raid_run_ops()
2233 tx = ops_run_compute6_1(sh, percpu); in raid_run_ops()
2235 tx = ops_run_compute6_2(sh, percpu); in raid_run_ops()
2238 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) in raid_run_ops()
2239 async_tx_ack(tx); in raid_run_ops()
2244 tx = ops_run_prexor5(sh, percpu, tx); in raid_run_ops()
2246 tx = ops_run_prexor6(sh, percpu, tx); in raid_run_ops()
2250 tx = ops_run_partial_parity(sh, percpu, tx); in raid_run_ops()
2253 tx = ops_run_biodrain(sh, tx); in raid_run_ops()
2259 ops_run_reconstruct5(sh, percpu, tx); in raid_run_ops()
2261 ops_run_reconstruct6(sh, percpu, tx); in raid_run_ops()
2265 if (sh->check_state == check_state_run) in raid_run_ops()
2267 else if (sh->check_state == check_state_run_q) in raid_run_ops()
2269 else if (sh->check_state == check_state_run_pq) in raid_run_ops()
2275 if (overlap_clear && !sh->batch_head) in raid_run_ops()
2276 for (i = disks; i--; ) { in raid_run_ops()
2277 struct r5dev *dev = &sh->dev[i]; in raid_run_ops()
2278 if (test_and_clear_bit(R5_Overlap, &dev->flags)) in raid_run_ops()
2279 wake_up(&sh->raid_conf->wait_for_overlap); in raid_run_ops()
2287 kfree(sh->pages); in free_stripe()
2289 if (sh->ppl_page) in free_stripe()
2290 __free_page(sh->ppl_page); in free_stripe()
2295 int disks, struct r5conf *conf) in alloc_stripe() argument
2302 spin_lock_init(&sh->stripe_lock); in alloc_stripe()
2303 spin_lock_init(&sh->batch_lock); in alloc_stripe()
2304 INIT_LIST_HEAD(&sh->batch_list); in alloc_stripe()
2305 INIT_LIST_HEAD(&sh->lru); in alloc_stripe()
2306 INIT_LIST_HEAD(&sh->r5c); in alloc_stripe()
2307 INIT_LIST_HEAD(&sh->log_list); in alloc_stripe()
2308 atomic_set(&sh->count, 1); in alloc_stripe()
2309 sh->raid_conf = conf; in alloc_stripe()
2310 sh->log_start = MaxSector; in alloc_stripe()
2312 struct r5dev *dev = &sh->dev[i]; in alloc_stripe()
2314 bio_init(&dev->req, &dev->vec, 1); in alloc_stripe()
2315 bio_init(&dev->rreq, &dev->rvec, 1); in alloc_stripe()
2318 if (raid5_has_ppl(conf)) { in alloc_stripe()
2319 sh->ppl_page = alloc_page(gfp); in alloc_stripe()
2320 if (!sh->ppl_page) { in alloc_stripe()
2326 if (init_stripe_shared_pages(sh, conf, disks)) { in alloc_stripe()
2334 static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) in grow_one_stripe() argument
2338 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf); in grow_one_stripe()
2344 free_stripe(conf->slab_cache, sh); in grow_one_stripe()
2347 sh->hash_lock_index = in grow_one_stripe()
2348 conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; in grow_one_stripe()
2350 atomic_inc(&conf->active_stripes); in grow_one_stripe()
2353 conf->max_nr_stripes++; in grow_one_stripe()
2357 static int grow_stripes(struct r5conf *conf, int num) in grow_stripes() argument
2360 size_t namelen = sizeof(conf->cache_name[0]); in grow_stripes()
2361 int devs = max(conf->raid_disks, conf->previous_raid_disks); in grow_stripes()
2363 if (conf->mddev->gendisk) in grow_stripes()
2364 snprintf(conf->cache_name[0], namelen, in grow_stripes()
2365 "raid%d-%s", conf->level, mdname(conf->mddev)); in grow_stripes()
2367 snprintf(conf->cache_name[0], namelen, in grow_stripes()
2368 "raid%d-%p", conf->level, conf->mddev); in grow_stripes()
2369 snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); in grow_stripes()
2371 conf->active_name = 0; in grow_stripes()
2372 sc = kmem_cache_create(conf->cache_name[conf->active_name], in grow_stripes()
2373 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), in grow_stripes()
2377 conf->slab_cache = sc; in grow_stripes()
2378 conf->pool_size = devs; in grow_stripes()
2379 while (num--) in grow_stripes()
2380 if (!grow_one_stripe(conf, GFP_KERNEL)) in grow_stripes()
2387 * scribble_alloc - allocate percpu scribble buffer for required size
2418 return -ENOMEM; in scribble_alloc()
2420 kvfree(percpu->scribble); in scribble_alloc()
2422 percpu->scribble = scribble; in scribble_alloc()
2423 percpu->scribble_obj_size = obj_size; in scribble_alloc()
2427 static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) in resize_chunks() argument
2437 if (conf->scribble_disks >= new_disks && in resize_chunks()
2438 conf->scribble_sectors >= new_sectors) in resize_chunks()
2440 mddev_suspend(conf->mddev); in resize_chunks()
2446 percpu = per_cpu_ptr(conf->percpu, cpu); in resize_chunks()
2448 new_sectors / RAID5_STRIPE_SECTORS(conf)); in resize_chunks()
2454 mddev_resume(conf->mddev); in resize_chunks()
2456 conf->scribble_disks = new_disks; in resize_chunks()
2457 conf->scribble_sectors = new_sectors; in resize_chunks()
2462 static int resize_stripes(struct r5conf *conf, int newsize) in resize_stripes() argument
2476 * 3/ reallocate conf->disks to be suitable bigger. If this fails, in resize_stripes()
2477 * we simple return a failure status - no need to clean anything up. in resize_stripes()
2495 md_allow_write(conf->mddev); in resize_stripes()
2498 sc = kmem_cache_create(conf->cache_name[1-conf->active_name], in resize_stripes()
2499 sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), in resize_stripes()
2502 return -ENOMEM; in resize_stripes()
2504 /* Need to ensure auto-resizing doesn't interfere */ in resize_stripes()
2505 mutex_lock(&conf->cache_size_mutex); in resize_stripes()
2507 for (i = conf->max_nr_stripes; i; i--) { in resize_stripes()
2508 nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); in resize_stripes()
2512 list_add(&nsh->lru, &newstripes); in resize_stripes()
2518 list_del(&nsh->lru); in resize_stripes()
2522 mutex_unlock(&conf->cache_size_mutex); in resize_stripes()
2523 return -ENOMEM; in resize_stripes()
2525 /* Step 2 - Must use GFP_NOIO now. in resize_stripes()
2532 lock_device_hash_lock(conf, hash); in resize_stripes()
2533 wait_event_cmd(conf->wait_for_stripe, in resize_stripes()
2534 !list_empty(conf->inactive_list + hash), in resize_stripes()
2535 unlock_device_hash_lock(conf, hash), in resize_stripes()
2536 lock_device_hash_lock(conf, hash)); in resize_stripes()
2537 osh = get_free_stripe(conf, hash); in resize_stripes()
2538 unlock_device_hash_lock(conf, hash); in resize_stripes()
2541 for (i = 0; i < osh->nr_pages; i++) { in resize_stripes()
2542 nsh->pages[i] = osh->pages[i]; in resize_stripes()
2543 osh->pages[i] = NULL; in resize_stripes()
2546 for(i=0; i<conf->pool_size; i++) { in resize_stripes()
2547 nsh->dev[i].page = osh->dev[i].page; in resize_stripes()
2548 nsh->dev[i].orig_page = osh->dev[i].page; in resize_stripes()
2549 nsh->dev[i].offset = osh->dev[i].offset; in resize_stripes()
2551 nsh->hash_lock_index = hash; in resize_stripes()
2552 free_stripe(conf->slab_cache, osh); in resize_stripes()
2554 if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + in resize_stripes()
2555 !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { in resize_stripes()
2560 kmem_cache_destroy(conf->slab_cache); in resize_stripes()
2565 * conf->disks and the scribble region in resize_stripes()
2569 for (i = 0; i < conf->pool_size; i++) in resize_stripes()
2570 ndisks[i] = conf->disks[i]; in resize_stripes()
2572 for (i = conf->pool_size; i < newsize; i++) { in resize_stripes()
2575 err = -ENOMEM; in resize_stripes()
2579 for (i = conf->pool_size; i < newsize; i++) in resize_stripes()
2584 kfree(conf->disks); in resize_stripes()
2585 conf->disks = ndisks; in resize_stripes()
2588 err = -ENOMEM; in resize_stripes()
2590 conf->slab_cache = sc; in resize_stripes()
2591 conf->active_name = 1-conf->active_name; in resize_stripes()
2596 list_del_init(&nsh->lru); in resize_stripes()
2599 for (i = 0; i < nsh->nr_pages; i++) { in resize_stripes()
2600 if (nsh->pages[i]) in resize_stripes()
2602 nsh->pages[i] = alloc_page(GFP_NOIO); in resize_stripes()
2603 if (!nsh->pages[i]) in resize_stripes()
2604 err = -ENOMEM; in resize_stripes()
2607 for (i = conf->raid_disks; i < newsize; i++) { in resize_stripes()
2608 if (nsh->dev[i].page) in resize_stripes()
2610 nsh->dev[i].page = raid5_get_dev_page(nsh, i); in resize_stripes()
2611 nsh->dev[i].orig_page = nsh->dev[i].page; in resize_stripes()
2612 nsh->dev[i].offset = raid5_get_page_offset(nsh, i); in resize_stripes()
2615 for (i=conf->raid_disks; i < newsize; i++) in resize_stripes()
2616 if (nsh->dev[i].page == NULL) { in resize_stripes()
2618 nsh->dev[i].page = p; in resize_stripes()
2619 nsh->dev[i].orig_page = p; in resize_stripes()
2620 nsh->dev[i].offset = 0; in resize_stripes()
2622 err = -ENOMEM; in resize_stripes()
2630 conf->pool_size = newsize; in resize_stripes()
2631 mutex_unlock(&conf->cache_size_mutex); in resize_stripes()
2636 static int drop_one_stripe(struct r5conf *conf) in drop_one_stripe() argument
2639 int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; in drop_one_stripe()
2641 spin_lock_irq(conf->hash_locks + hash); in drop_one_stripe()
2642 sh = get_free_stripe(conf, hash); in drop_one_stripe()
2643 spin_unlock_irq(conf->hash_locks + hash); in drop_one_stripe()
2646 BUG_ON(atomic_read(&sh->count)); in drop_one_stripe()
2648 free_stripe(conf->slab_cache, sh); in drop_one_stripe()
2649 atomic_dec(&conf->active_stripes); in drop_one_stripe()
2650 conf->max_nr_stripes--; in drop_one_stripe()
2654 static void shrink_stripes(struct r5conf *conf) in shrink_stripes() argument
2656 while (conf->max_nr_stripes && in shrink_stripes()
2657 drop_one_stripe(conf)) in shrink_stripes()
2660 kmem_cache_destroy(conf->slab_cache); in shrink_stripes()
2661 conf->slab_cache = NULL; in shrink_stripes()
2666 struct stripe_head *sh = bi->bi_private; in raid5_end_read_request()
2667 struct r5conf *conf = sh->raid_conf; in raid5_end_read_request() local
2668 int disks = sh->disks, i; in raid5_end_read_request()
2674 if (bi == &sh->dev[i].req) in raid5_end_read_request()
2678 (unsigned long long)sh->sector, i, atomic_read(&sh->count), in raid5_end_read_request()
2679 bi->bi_status); in raid5_end_read_request()
2685 if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) in raid5_end_read_request()
2691 rdev = conf->disks[i].replacement; in raid5_end_read_request()
2693 rdev = conf->disks[i].rdev; in raid5_end_read_request()
2695 if (use_new_offset(conf, sh)) in raid5_end_read_request()
2696 s = sh->sector + rdev->new_data_offset; in raid5_end_read_request()
2698 s = sh->sector + rdev->data_offset; in raid5_end_read_request()
2699 if (!bi->bi_status) { in raid5_end_read_request()
2700 set_bit(R5_UPTODATE, &sh->dev[i].flags); in raid5_end_read_request()
2701 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { in raid5_end_read_request()
2708 mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf), in raid5_end_read_request()
2710 bdevname(rdev->bdev, b)); in raid5_end_read_request()
2711 atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors); in raid5_end_read_request()
2712 clear_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2713 clear_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_read_request()
2714 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) in raid5_end_read_request()
2715 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2717 if (test_bit(R5_InJournal, &sh->dev[i].flags)) in raid5_end_read_request()
2722 set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); in raid5_end_read_request()
2724 if (atomic_read(&rdev->read_errors)) in raid5_end_read_request()
2725 atomic_set(&rdev->read_errors, 0); in raid5_end_read_request()
2727 const char *bdn = bdevname(rdev->bdev, b); in raid5_end_read_request()
2731 clear_bit(R5_UPTODATE, &sh->dev[i].flags); in raid5_end_read_request()
2732 if (!(bi->bi_status == BLK_STS_PROTECTION)) in raid5_end_read_request()
2733 atomic_inc(&rdev->read_errors); in raid5_end_read_request()
2734 if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) in raid5_end_read_request()
2737 mdname(conf->mddev), in raid5_end_read_request()
2740 else if (conf->mddev->degraded >= conf->max_degraded) { in raid5_end_read_request()
2744 mdname(conf->mddev), in raid5_end_read_request()
2747 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { in raid5_end_read_request()
2752 mdname(conf->mddev), in raid5_end_read_request()
2755 } else if (atomic_read(&rdev->read_errors) in raid5_end_read_request()
2756 > conf->max_nr_stripes) { in raid5_end_read_request()
2757 if (!test_bit(Faulty, &rdev->flags)) { in raid5_end_read_request()
2759 mdname(conf->mddev), in raid5_end_read_request()
2760 atomic_read(&rdev->read_errors), in raid5_end_read_request()
2761 conf->max_nr_stripes); in raid5_end_read_request()
2763 mdname(conf->mddev), bdn); in raid5_end_read_request()
2767 if (set_bad && test_bit(In_sync, &rdev->flags) in raid5_end_read_request()
2768 && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) in raid5_end_read_request()
2771 if (sh->qd_idx >= 0 && sh->pd_idx == i) in raid5_end_read_request()
2772 set_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2773 else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { in raid5_end_read_request()
2774 set_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2775 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2777 set_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2779 clear_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2780 clear_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_read_request()
2782 && test_bit(In_sync, &rdev->flags) in raid5_end_read_request()
2784 rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0))) in raid5_end_read_request()
2785 md_error(conf->mddev, rdev); in raid5_end_read_request()
2788 rdev_dec_pending(rdev, conf->mddev); in raid5_end_read_request()
2790 clear_bit(R5_LOCKED, &sh->dev[i].flags); in raid5_end_read_request()
2791 set_bit(STRIPE_HANDLE, &sh->state); in raid5_end_read_request()
2797 struct stripe_head *sh = bi->bi_private; in raid5_end_write_request()
2798 struct r5conf *conf = sh->raid_conf; in raid5_end_write_request() local
2799 int disks = sh->disks, i; in raid5_end_write_request()
2806 if (bi == &sh->dev[i].req) { in raid5_end_write_request()
2807 rdev = conf->disks[i].rdev; in raid5_end_write_request()
2810 if (bi == &sh->dev[i].rreq) { in raid5_end_write_request()
2811 rdev = conf->disks[i].replacement; in raid5_end_write_request()
2819 rdev = conf->disks[i].rdev; in raid5_end_write_request()
2824 (unsigned long long)sh->sector, i, atomic_read(&sh->count), in raid5_end_write_request()
2825 bi->bi_status); in raid5_end_write_request()
2833 if (bi->bi_status) in raid5_end_write_request()
2834 md_error(conf->mddev, rdev); in raid5_end_write_request()
2835 else if (is_badblock(rdev, sh->sector, in raid5_end_write_request()
2836 RAID5_STRIPE_SECTORS(conf), in raid5_end_write_request()
2838 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); in raid5_end_write_request()
2840 if (bi->bi_status) { in raid5_end_write_request()
2841 set_bit(STRIPE_DEGRADED, &sh->state); in raid5_end_write_request()
2842 set_bit(WriteErrorSeen, &rdev->flags); in raid5_end_write_request()
2843 set_bit(R5_WriteError, &sh->dev[i].flags); in raid5_end_write_request()
2844 if (!test_and_set_bit(WantReplacement, &rdev->flags)) in raid5_end_write_request()
2846 &rdev->mddev->recovery); in raid5_end_write_request()
2847 } else if (is_badblock(rdev, sh->sector, in raid5_end_write_request()
2848 RAID5_STRIPE_SECTORS(conf), in raid5_end_write_request()
2850 set_bit(R5_MadeGood, &sh->dev[i].flags); in raid5_end_write_request()
2851 if (test_bit(R5_ReadError, &sh->dev[i].flags)) in raid5_end_write_request()
2854 * a re-write. in raid5_end_write_request()
2856 set_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_write_request()
2859 rdev_dec_pending(rdev, conf->mddev); in raid5_end_write_request()
2861 if (sh->batch_head && bi->bi_status && !replacement) in raid5_end_write_request()
2862 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); in raid5_end_write_request()
2865 if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) in raid5_end_write_request()
2866 clear_bit(R5_LOCKED, &sh->dev[i].flags); in raid5_end_write_request()
2867 set_bit(STRIPE_HANDLE, &sh->state); in raid5_end_write_request()
2869 if (sh->batch_head && sh != sh->batch_head) in raid5_end_write_request()
2870 raid5_release_stripe(sh->batch_head); in raid5_end_write_request()
2877 struct r5conf *conf = mddev->private; in raid5_error() local
2882 mdname(mddev), bdevname(rdev->bdev, b)); in raid5_error()
2884 spin_lock_irqsave(&conf->device_lock, flags); in raid5_error()
2885 set_bit(Faulty, &rdev->flags); in raid5_error()
2886 clear_bit(In_sync, &rdev->flags); in raid5_error()
2887 mddev->degraded = raid5_calc_degraded(conf); in raid5_error()
2889 if (has_failed(conf)) { in raid5_error()
2890 set_bit(MD_BROKEN, &conf->mddev->flags); in raid5_error()
2891 conf->recovery_disabled = mddev->recovery_disabled; in raid5_error()
2894 mdname(mddev), mddev->degraded, conf->raid_disks); in raid5_error()
2897 mdname(mddev), conf->raid_disks - mddev->degraded); in raid5_error()
2900 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_error()
2901 set_bit(MD_RECOVERY_INTR, &mddev->recovery); in raid5_error()
2903 set_bit(Blocked, &rdev->flags); in raid5_error()
2904 set_mask_bits(&mddev->sb_flags, 0, in raid5_error()
2913 sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, in raid5_compute_sector() argument
2923 int algorithm = previous ? conf->prev_algo in raid5_compute_sector()
2924 : conf->algorithm; in raid5_compute_sector()
2925 int sectors_per_chunk = previous ? conf->prev_chunk_sectors in raid5_compute_sector()
2926 : conf->chunk_sectors; in raid5_compute_sector()
2927 int raid_disks = previous ? conf->previous_raid_disks in raid5_compute_sector()
2928 : conf->raid_disks; in raid5_compute_sector()
2929 int data_disks = raid_disks - conf->max_degraded; in raid5_compute_sector()
2948 pd_idx = qd_idx = -1; in raid5_compute_sector()
2949 switch(conf->level) { in raid5_compute_sector()
2956 pd_idx = data_disks - sector_div(stripe2, raid_disks); in raid5_compute_sector()
2966 pd_idx = data_disks - sector_div(stripe2, raid_disks); in raid5_compute_sector()
2988 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
2990 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
2999 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3006 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3032 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3046 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3048 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3058 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3059 qd_idx = (pd_idx + raid_disks - 1) % raid_disks; in raid5_compute_sector()
3066 pd_idx = data_disks - sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3069 qd_idx = raid_disks - 1; in raid5_compute_sector()
3073 pd_idx = sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3076 qd_idx = raid_disks - 1; in raid5_compute_sector()
3080 pd_idx = data_disks - sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3081 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); in raid5_compute_sector()
3082 qd_idx = raid_disks - 1; in raid5_compute_sector()
3086 pd_idx = sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3087 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); in raid5_compute_sector()
3088 qd_idx = raid_disks - 1; in raid5_compute_sector()
3094 qd_idx = raid_disks - 1; in raid5_compute_sector()
3104 sh->pd_idx = pd_idx; in raid5_compute_sector()
3105 sh->qd_idx = qd_idx; in raid5_compute_sector()
3106 sh->ddf_layout = ddf_layout; in raid5_compute_sector()
3117 struct r5conf *conf = sh->raid_conf; in raid5_compute_blocknr() local
3118 int raid_disks = sh->disks; in raid5_compute_blocknr()
3119 int data_disks = raid_disks - conf->max_degraded; in raid5_compute_blocknr()
3120 sector_t new_sector = sh->sector, check; in raid5_compute_blocknr()
3121 int sectors_per_chunk = previous ? conf->prev_chunk_sectors in raid5_compute_blocknr()
3122 : conf->chunk_sectors; in raid5_compute_blocknr()
3123 int algorithm = previous ? conf->prev_algo in raid5_compute_blocknr()
3124 : conf->algorithm; in raid5_compute_blocknr()
3135 if (i == sh->pd_idx) in raid5_compute_blocknr()
3137 switch(conf->level) { in raid5_compute_blocknr()
3143 if (i > sh->pd_idx) in raid5_compute_blocknr()
3144 i--; in raid5_compute_blocknr()
3148 if (i < sh->pd_idx) in raid5_compute_blocknr()
3150 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3153 i -= 1; in raid5_compute_blocknr()
3162 if (i == sh->qd_idx) in raid5_compute_blocknr()
3169 if (sh->pd_idx == raid_disks-1) in raid5_compute_blocknr()
3170 i--; /* Q D D D P */ in raid5_compute_blocknr()
3171 else if (i > sh->pd_idx) in raid5_compute_blocknr()
3172 i -= 2; /* D D P Q D */ in raid5_compute_blocknr()
3176 if (sh->pd_idx == raid_disks-1) in raid5_compute_blocknr()
3177 i--; /* Q D D D P */ in raid5_compute_blocknr()
3180 if (i < sh->pd_idx) in raid5_compute_blocknr()
3182 i -= (sh->pd_idx + 2); in raid5_compute_blocknr()
3186 i -= 2; in raid5_compute_blocknr()
3192 if (sh->pd_idx == 0) in raid5_compute_blocknr()
3193 i--; /* P D D D Q */ in raid5_compute_blocknr()
3196 if (i < sh->pd_idx) in raid5_compute_blocknr()
3198 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3203 if (i > sh->pd_idx) in raid5_compute_blocknr()
3204 i--; in raid5_compute_blocknr()
3208 if (i < sh->pd_idx) in raid5_compute_blocknr()
3210 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3213 i -= 1; in raid5_compute_blocknr()
3224 check = raid5_compute_sector(conf, r_sector, in raid5_compute_blocknr()
3226 if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx in raid5_compute_blocknr()
3227 || sh2.qd_idx != sh->qd_idx) { in raid5_compute_blocknr()
3229 mdname(conf->mddev)); in raid5_compute_blocknr()
3242 * 1. degraded stripe has a non-overwrite to the missing dev, AND this
3245 * In this case, when reading data for the non-overwrite dev, it is
3253 * It is important to be able to flush all stripes in raid5-cache.
3256 * stripe, we need to reserve (conf->raid_disk + 1) pages per stripe
3258 * operation, we only need (conf->max_degraded + 1) pages per stripe.
3269 * based on data in stripe cache. The array is read-only to upper
3273 static inline bool delay_towrite(struct r5conf *conf, in delay_towrite() argument
3278 if (!test_bit(R5_OVERWRITE, &dev->flags) && in delay_towrite()
3279 !test_bit(R5_Insync, &dev->flags) && s->injournal) in delay_towrite()
3282 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && in delay_towrite()
3283 s->injournal > 0) in delay_towrite()
3286 if (s->log_failed && s->injournal) in delay_towrite()
3295 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks; in schedule_reconstruction()
3296 struct r5conf *conf = sh->raid_conf; in schedule_reconstruction() local
3297 int level = conf->level; in schedule_reconstruction()
3308 for (i = disks; i--; ) { in schedule_reconstruction()
3309 struct r5dev *dev = &sh->dev[i]; in schedule_reconstruction()
3311 if (dev->towrite && !delay_towrite(conf, dev, s)) { in schedule_reconstruction()
3312 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3313 set_bit(R5_Wantdrain, &dev->flags); in schedule_reconstruction()
3315 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3316 s->locked++; in schedule_reconstruction()
3317 } else if (test_bit(R5_InJournal, &dev->flags)) { in schedule_reconstruction()
3318 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3319 s->locked++; in schedule_reconstruction()
3327 if (!s->locked) in schedule_reconstruction()
3330 sh->reconstruct_state = reconstruct_state_drain_run; in schedule_reconstruction()
3331 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); in schedule_reconstruction()
3333 sh->reconstruct_state = reconstruct_state_run; in schedule_reconstruction()
3335 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); in schedule_reconstruction()
3337 if (s->locked + conf->max_degraded == disks) in schedule_reconstruction()
3338 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) in schedule_reconstruction()
3339 atomic_inc(&conf->pending_full_writes); in schedule_reconstruction()
3341 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || in schedule_reconstruction()
3342 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); in schedule_reconstruction()
3344 (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) || in schedule_reconstruction()
3345 test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags)))); in schedule_reconstruction()
3347 for (i = disks; i--; ) { in schedule_reconstruction()
3348 struct r5dev *dev = &sh->dev[i]; in schedule_reconstruction()
3352 if (dev->towrite && in schedule_reconstruction()
3353 (test_bit(R5_UPTODATE, &dev->flags) || in schedule_reconstruction()
3354 test_bit(R5_Wantcompute, &dev->flags))) { in schedule_reconstruction()
3355 set_bit(R5_Wantdrain, &dev->flags); in schedule_reconstruction()
3356 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3357 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3358 s->locked++; in schedule_reconstruction()
3359 } else if (test_bit(R5_InJournal, &dev->flags)) { in schedule_reconstruction()
3360 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3361 s->locked++; in schedule_reconstruction()
3364 if (!s->locked) in schedule_reconstruction()
3365 /* False alarm - nothing to do */ in schedule_reconstruction()
3367 sh->reconstruct_state = reconstruct_state_prexor_drain_run; in schedule_reconstruction()
3368 set_bit(STRIPE_OP_PREXOR, &s->ops_request); in schedule_reconstruction()
3369 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); in schedule_reconstruction()
3370 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); in schedule_reconstruction()
3376 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); in schedule_reconstruction()
3377 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); in schedule_reconstruction()
3378 s->locked++; in schedule_reconstruction()
3381 int qd_idx = sh->qd_idx; in schedule_reconstruction()
3382 struct r5dev *dev = &sh->dev[qd_idx]; in schedule_reconstruction()
3384 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3385 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3386 s->locked++; in schedule_reconstruction()
3389 if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page && in schedule_reconstruction()
3390 test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && in schedule_reconstruction()
3391 !test_bit(STRIPE_FULL_WRITE, &sh->state) && in schedule_reconstruction()
3392 test_bit(R5_Insync, &sh->dev[pd_idx].flags)) in schedule_reconstruction()
3393 set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request); in schedule_reconstruction()
3396 __func__, (unsigned long long)sh->sector, in schedule_reconstruction()
3397 s->locked, s->ops_request); in schedule_reconstruction()
3409 struct r5conf *conf = sh->raid_conf; in add_stripe_bio() local
3413 (unsigned long long)bi->bi_iter.bi_sector, in add_stripe_bio()
3414 (unsigned long long)sh->sector); in add_stripe_bio()
3416 spin_lock_irq(&sh->stripe_lock); in add_stripe_bio()
3417 sh->dev[dd_idx].write_hint = bi->bi_write_hint; in add_stripe_bio()
3419 if (sh->batch_head) in add_stripe_bio()
3422 bip = &sh->dev[dd_idx].towrite; in add_stripe_bio()
3426 bip = &sh->dev[dd_idx].toread; in add_stripe_bio()
3427 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) { in add_stripe_bio()
3428 if (bio_end_sector(*bip) > bi->bi_iter.bi_sector) in add_stripe_bio()
3430 bip = & (*bip)->bi_next; in add_stripe_bio()
3432 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) in add_stripe_bio()
3435 if (forwrite && raid5_has_ppl(conf)) { in add_stripe_bio()
3449 for (i = 0; i < sh->disks; i++) { in add_stripe_bio()
3450 if (i != sh->pd_idx && in add_stripe_bio()
3451 (i == dd_idx || sh->dev[i].towrite)) { in add_stripe_bio()
3452 sector = sh->dev[i].sector; in add_stripe_bio()
3461 if (first + conf->chunk_sectors * (count - 1) != last) in add_stripe_bio()
3466 clear_bit(STRIPE_BATCH_READY, &sh->state); in add_stripe_bio()
3468 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); in add_stripe_bio()
3470 bi->bi_next = *bip; in add_stripe_bio()
3473 md_write_inc(conf->mddev, bi); in add_stripe_bio()
3477 sector_t sector = sh->dev[dd_idx].sector; in add_stripe_bio()
3478 for (bi=sh->dev[dd_idx].towrite; in add_stripe_bio()
3479 sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) && in add_stripe_bio()
3480 bi && bi->bi_iter.bi_sector <= sector; in add_stripe_bio()
3481 bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) { in add_stripe_bio()
3485 if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf)) in add_stripe_bio()
3486 if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags)) in add_stripe_bio()
3487 sh->overwrite_disks++; in add_stripe_bio()
3491 (unsigned long long)(*bip)->bi_iter.bi_sector, in add_stripe_bio()
3492 (unsigned long long)sh->sector, dd_idx); in add_stripe_bio()
3494 if (conf->mddev->bitmap && firstwrite) { in add_stripe_bio()
3507 set_bit(STRIPE_BITMAP_PENDING, &sh->state); in add_stripe_bio()
3508 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3509 md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, in add_stripe_bio()
3510 RAID5_STRIPE_SECTORS(conf), 0); in add_stripe_bio()
3511 spin_lock_irq(&sh->stripe_lock); in add_stripe_bio()
3512 clear_bit(STRIPE_BITMAP_PENDING, &sh->state); in add_stripe_bio()
3513 if (!sh->batch_head) { in add_stripe_bio()
3514 sh->bm_seq = conf->seq_flush+1; in add_stripe_bio()
3515 set_bit(STRIPE_BIT_DELAY, &sh->state); in add_stripe_bio()
3518 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3521 stripe_add_to_batch_list(conf, sh); in add_stripe_bio()
3525 set_bit(R5_Overlap, &sh->dev[dd_idx].flags); in add_stripe_bio()
3526 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3530 static void end_reshape(struct r5conf *conf);
3532 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, in stripe_set_idx() argument
3536 previous ? conf->prev_chunk_sectors : conf->chunk_sectors; in stripe_set_idx()
3539 int disks = previous ? conf->previous_raid_disks : conf->raid_disks; in stripe_set_idx()
3541 raid5_compute_sector(conf, in stripe_set_idx()
3542 stripe * (disks - conf->max_degraded) in stripe_set_idx()
3549 handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, in handle_failed_stripe() argument
3553 BUG_ON(sh->batch_head); in handle_failed_stripe()
3554 for (i = disks; i--; ) { in handle_failed_stripe()
3558 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { in handle_failed_stripe()
3561 rdev = rcu_dereference(conf->disks[i].rdev); in handle_failed_stripe()
3562 if (rdev && test_bit(In_sync, &rdev->flags) && in handle_failed_stripe()
3563 !test_bit(Faulty, &rdev->flags)) in handle_failed_stripe()
3564 atomic_inc(&rdev->nr_pending); in handle_failed_stripe()
3571 sh->sector, in handle_failed_stripe()
3572 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_stripe()
3573 md_error(conf->mddev, rdev); in handle_failed_stripe()
3574 rdev_dec_pending(rdev, conf->mddev); in handle_failed_stripe()
3577 spin_lock_irq(&sh->stripe_lock); in handle_failed_stripe()
3579 bi = sh->dev[i].towrite; in handle_failed_stripe()
3580 sh->dev[i].towrite = NULL; in handle_failed_stripe()
3581 sh->overwrite_disks = 0; in handle_failed_stripe()
3582 spin_unlock_irq(&sh->stripe_lock); in handle_failed_stripe()
3588 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in handle_failed_stripe()
3589 wake_up(&conf->wait_for_overlap); in handle_failed_stripe()
3591 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3592 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3593 struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3595 md_write_end(conf->mddev); in handle_failed_stripe()
3600 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_failed_stripe()
3601 RAID5_STRIPE_SECTORS(conf), 0, 0); in handle_failed_stripe()
3604 bi = sh->dev[i].written; in handle_failed_stripe()
3605 sh->dev[i].written = NULL; in handle_failed_stripe()
3606 if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { in handle_failed_stripe()
3607 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in handle_failed_stripe()
3608 sh->dev[i].page = sh->dev[i].orig_page; in handle_failed_stripe()
3612 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3613 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3614 struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3616 md_write_end(conf->mddev); in handle_failed_stripe()
3621 /* fail any reads if this device is non-operational and in handle_failed_stripe()
3624 if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && in handle_failed_stripe()
3625 s->failed > conf->max_degraded && in handle_failed_stripe()
3626 (!test_bit(R5_Insync, &sh->dev[i].flags) || in handle_failed_stripe()
3627 test_bit(R5_ReadError, &sh->dev[i].flags))) { in handle_failed_stripe()
3628 spin_lock_irq(&sh->stripe_lock); in handle_failed_stripe()
3629 bi = sh->dev[i].toread; in handle_failed_stripe()
3630 sh->dev[i].toread = NULL; in handle_failed_stripe()
3631 spin_unlock_irq(&sh->stripe_lock); in handle_failed_stripe()
3632 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in handle_failed_stripe()
3633 wake_up(&conf->wait_for_overlap); in handle_failed_stripe()
3635 s->to_read--; in handle_failed_stripe()
3636 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3637 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3639 r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3646 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_failed_stripe()
3647 RAID5_STRIPE_SECTORS(conf), 0, 0); in handle_failed_stripe()
3649 * still be locked - so just clear all R5_LOCKED flags in handle_failed_stripe()
3651 clear_bit(R5_LOCKED, &sh->dev[i].flags); in handle_failed_stripe()
3653 s->to_write = 0; in handle_failed_stripe()
3654 s->written = 0; in handle_failed_stripe()
3656 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) in handle_failed_stripe()
3657 if (atomic_dec_and_test(&conf->pending_full_writes)) in handle_failed_stripe()
3658 md_wakeup_thread(conf->mddev->thread); in handle_failed_stripe()
3662 handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, in handle_failed_sync() argument
3668 BUG_ON(sh->batch_head); in handle_failed_sync()
3669 clear_bit(STRIPE_SYNCING, &sh->state); in handle_failed_sync()
3670 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) in handle_failed_sync()
3671 wake_up(&conf->wait_for_overlap); in handle_failed_sync()
3672 s->syncing = 0; in handle_failed_sync()
3673 s->replacing = 0; in handle_failed_sync()
3679 * non-sync devices, or abort the recovery in handle_failed_sync()
3681 if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) { in handle_failed_sync()
3686 for (i = 0; i < conf->raid_disks; i++) { in handle_failed_sync()
3687 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in handle_failed_sync()
3689 && !test_bit(Faulty, &rdev->flags) in handle_failed_sync()
3690 && !test_bit(In_sync, &rdev->flags) in handle_failed_sync()
3691 && !rdev_set_badblocks(rdev, sh->sector, in handle_failed_sync()
3692 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_sync()
3694 rdev = rcu_dereference(conf->disks[i].replacement); in handle_failed_sync()
3696 && !test_bit(Faulty, &rdev->flags) in handle_failed_sync()
3697 && !test_bit(In_sync, &rdev->flags) in handle_failed_sync()
3698 && !rdev_set_badblocks(rdev, sh->sector, in handle_failed_sync()
3699 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_sync()
3704 conf->recovery_disabled = in handle_failed_sync()
3705 conf->mddev->recovery_disabled; in handle_failed_sync()
3707 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort); in handle_failed_sync()
3716 rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement); in want_replace()
3718 && !test_bit(Faulty, &rdev->flags) in want_replace()
3719 && !test_bit(In_sync, &rdev->flags) in want_replace()
3720 && (rdev->recovery_offset <= sh->sector in want_replace()
3721 || rdev->mddev->recovery_cp <= sh->sector)) in want_replace()
3730 struct r5dev *dev = &sh->dev[disk_idx]; in need_this_block()
3731 struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], in need_this_block()
3732 &sh->dev[s->failed_num[1]] }; in need_this_block()
3734 bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW); in need_this_block()
3737 if (test_bit(R5_LOCKED, &dev->flags) || in need_this_block()
3738 test_bit(R5_UPTODATE, &dev->flags)) in need_this_block()
3744 if (dev->toread || in need_this_block()
3745 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags))) in need_this_block()
3749 if (s->syncing || s->expanding || in need_this_block()
3750 (s->replacing && want_replace(sh, disk_idx))) in need_this_block()
3756 if ((s->failed >= 1 && fdev[0]->toread) || in need_this_block()
3757 (s->failed >= 2 && fdev[1]->toread)) in need_this_block()
3763 /* Sometimes neither read-modify-write nor reconstruct-write in need_this_block()
3765 * can. Then the parity-update is certain to have enough to in need_this_block()
3771 if (!s->failed || !s->to_write) in need_this_block()
3774 if (test_bit(R5_Insync, &dev->flags) && in need_this_block()
3775 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in need_this_block()
3776 /* Pre-reads at not permitted until after short delay in need_this_block()
3783 for (i = 0; i < s->failed && i < 2; i++) { in need_this_block()
3784 if (fdev[i]->towrite && in need_this_block()
3785 !test_bit(R5_UPTODATE, &fdev[i]->flags) && in need_this_block()
3786 !test_bit(R5_OVERWRITE, &fdev[i]->flags)) in need_this_block()
3794 if (s->failed >= 2 && in need_this_block()
3795 (fdev[i]->towrite || in need_this_block()
3796 s->failed_num[i] == sh->pd_idx || in need_this_block()
3797 s->failed_num[i] == sh->qd_idx) && in need_this_block()
3798 !test_bit(R5_UPTODATE, &fdev[i]->flags)) in need_this_block()
3801 * reconstruct-write. in need_this_block()
3806 /* If we are forced to do a reconstruct-write, because parity in need_this_block()
3814 sh->sector < sh->raid_conf->mddev->recovery_cp) in need_this_block()
3815 /* reconstruct-write isn't being forced */ in need_this_block()
3817 for (i = 0; i < s->failed && i < 2; i++) { in need_this_block()
3818 if (s->failed_num[i] != sh->pd_idx && in need_this_block()
3819 s->failed_num[i] != sh->qd_idx && in need_this_block()
3820 !test_bit(R5_UPTODATE, &fdev[i]->flags) && in need_this_block()
3821 !test_bit(R5_OVERWRITE, &fdev[i]->flags)) in need_this_block()
3828 /* fetch_block - checks the given member device to see if its data needs
3837 struct r5dev *dev = &sh->dev[disk_idx]; in fetch_block()
3844 BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); in fetch_block()
3845 BUG_ON(test_bit(R5_Wantread, &dev->flags)); in fetch_block()
3846 BUG_ON(sh->batch_head); in fetch_block()
3849 * In the raid6 case if the only non-uptodate disk is P in fetch_block()
3851 * drives. It is safe to compute rather than re-read P. in fetch_block()
3857 if ((s->uptodate == disks - 1) && in fetch_block()
3858 ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) || in fetch_block()
3859 (s->failed && (disk_idx == s->failed_num[0] || in fetch_block()
3860 disk_idx == s->failed_num[1])))) { in fetch_block()
3865 (unsigned long long)sh->sector, disk_idx); in fetch_block()
3866 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in fetch_block()
3867 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in fetch_block()
3868 set_bit(R5_Wantcompute, &dev->flags); in fetch_block()
3869 sh->ops.target = disk_idx; in fetch_block()
3870 sh->ops.target2 = -1; /* no 2nd target */ in fetch_block()
3871 s->req_compute = 1; in fetch_block()
3878 s->uptodate++; in fetch_block()
3880 } else if (s->uptodate == disks-2 && s->failed >= 2) { in fetch_block()
3881 /* Computing 2-failure is *very* expensive; only in fetch_block()
3885 for (other = disks; other--; ) { in fetch_block()
3889 &sh->dev[other].flags)) in fetch_block()
3894 (unsigned long long)sh->sector, in fetch_block()
3896 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in fetch_block()
3897 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in fetch_block()
3898 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); in fetch_block()
3899 set_bit(R5_Wantcompute, &sh->dev[other].flags); in fetch_block()
3900 sh->ops.target = disk_idx; in fetch_block()
3901 sh->ops.target2 = other; in fetch_block()
3902 s->uptodate += 2; in fetch_block()
3903 s->req_compute = 1; in fetch_block()
3905 } else if (test_bit(R5_Insync, &dev->flags)) { in fetch_block()
3906 set_bit(R5_LOCKED, &dev->flags); in fetch_block()
3907 set_bit(R5_Wantread, &dev->flags); in fetch_block()
3908 s->locked++; in fetch_block()
3910 disk_idx, s->syncing); in fetch_block()
3918 * handle_stripe_fill - read or compute data to satisfy pending requests.
3930 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && in handle_stripe_fill()
3931 !sh->reconstruct_state) { in handle_stripe_fill()
3940 if (s->to_read && s->injournal && s->failed) { in handle_stripe_fill()
3941 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) in handle_stripe_fill()
3946 for (i = disks; i--; ) in handle_stripe_fill()
3951 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_fill()
3961 static void handle_stripe_clean_event(struct r5conf *conf, in handle_stripe_clean_event() argument
3970 for (i = disks; i--; ) in handle_stripe_clean_event()
3971 if (sh->dev[i].written) { in handle_stripe_clean_event()
3972 dev = &sh->dev[i]; in handle_stripe_clean_event()
3973 if (!test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_clean_event()
3974 (test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_clean_event()
3975 test_bit(R5_Discard, &dev->flags) || in handle_stripe_clean_event()
3976 test_bit(R5_SkipCopy, &dev->flags))) { in handle_stripe_clean_event()
3980 if (test_and_clear_bit(R5_Discard, &dev->flags)) in handle_stripe_clean_event()
3981 clear_bit(R5_UPTODATE, &dev->flags); in handle_stripe_clean_event()
3982 if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { in handle_stripe_clean_event()
3983 WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); in handle_stripe_clean_event()
3988 dev->page = dev->orig_page; in handle_stripe_clean_event()
3989 wbi = dev->written; in handle_stripe_clean_event()
3990 dev->written = NULL; in handle_stripe_clean_event()
3991 while (wbi && wbi->bi_iter.bi_sector < in handle_stripe_clean_event()
3992 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in handle_stripe_clean_event()
3993 wbi2 = r5_next_bio(conf, wbi, dev->sector); in handle_stripe_clean_event()
3994 md_write_end(conf->mddev); in handle_stripe_clean_event()
3998 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_stripe_clean_event()
3999 RAID5_STRIPE_SECTORS(conf), in handle_stripe_clean_event()
4000 !test_bit(STRIPE_DEGRADED, &sh->state), in handle_stripe_clean_event()
4002 if (head_sh->batch_head) { in handle_stripe_clean_event()
4003 sh = list_first_entry(&sh->batch_list, in handle_stripe_clean_event()
4007 dev = &sh->dev[i]; in handle_stripe_clean_event()
4012 dev = &sh->dev[i]; in handle_stripe_clean_event()
4013 } else if (test_bit(R5_Discard, &dev->flags)) in handle_stripe_clean_event()
4020 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { in handle_stripe_clean_event()
4022 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); in handle_stripe_clean_event()
4023 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); in handle_stripe_clean_event()
4024 if (sh->qd_idx >= 0) { in handle_stripe_clean_event()
4025 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); in handle_stripe_clean_event()
4026 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); in handle_stripe_clean_event()
4029 clear_bit(STRIPE_DISCARD, &sh->state); in handle_stripe_clean_event()
4036 hash = sh->hash_lock_index; in handle_stripe_clean_event()
4037 spin_lock_irq(conf->hash_locks + hash); in handle_stripe_clean_event()
4039 spin_unlock_irq(conf->hash_locks + hash); in handle_stripe_clean_event()
4040 if (head_sh->batch_head) { in handle_stripe_clean_event()
4041 sh = list_first_entry(&sh->batch_list, in handle_stripe_clean_event()
4048 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) in handle_stripe_clean_event()
4049 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_clean_event()
4053 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) in handle_stripe_clean_event()
4054 if (atomic_dec_and_test(&conf->pending_full_writes)) in handle_stripe_clean_event()
4055 md_wakeup_thread(conf->mddev->thread); in handle_stripe_clean_event()
4057 if (head_sh->batch_head && do_endio) in handle_stripe_clean_event()
4063 * old data. This page is stored in dev->orig_page.
4071 return (test_bit(R5_UPTODATE, &dev->flags)) && in uptodate_for_rmw()
4072 (!test_bit(R5_InJournal, &dev->flags) || in uptodate_for_rmw()
4073 test_bit(R5_OrigPageUPTDODATE, &dev->flags)); in uptodate_for_rmw()
4076 static int handle_stripe_dirtying(struct r5conf *conf, in handle_stripe_dirtying() argument
4082 sector_t recovery_cp = conf->mddev->recovery_cp; in handle_stripe_dirtying()
4087 * In this case, we need to always do reconstruct-write, to ensure in handle_stripe_dirtying()
4088 * that in case of drive failure or read-error correction, we in handle_stripe_dirtying()
4091 if (conf->rmw_level == PARITY_DISABLE_RMW || in handle_stripe_dirtying()
4092 (recovery_cp < MaxSector && sh->sector >= recovery_cp && in handle_stripe_dirtying()
4093 s->failed == 0)) { in handle_stripe_dirtying()
4094 /* Calculate the real rcw later - for now make it in handle_stripe_dirtying()
4098 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", in handle_stripe_dirtying()
4099 conf->rmw_level, (unsigned long long)recovery_cp, in handle_stripe_dirtying()
4100 (unsigned long long)sh->sector); in handle_stripe_dirtying()
4101 } else for (i = disks; i--; ) { in handle_stripe_dirtying()
4103 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4104 if (((dev->towrite && !delay_towrite(conf, dev, s)) || in handle_stripe_dirtying()
4105 i == sh->pd_idx || i == sh->qd_idx || in handle_stripe_dirtying()
4106 test_bit(R5_InJournal, &dev->flags)) && in handle_stripe_dirtying()
4107 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4109 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4110 if (test_bit(R5_Insync, &dev->flags)) in handle_stripe_dirtying()
4116 if (!test_bit(R5_OVERWRITE, &dev->flags) && in handle_stripe_dirtying()
4117 i != sh->pd_idx && i != sh->qd_idx && in handle_stripe_dirtying()
4118 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4119 !(test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_dirtying()
4120 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4121 if (test_bit(R5_Insync, &dev->flags)) in handle_stripe_dirtying()
4129 (unsigned long long)sh->sector, sh->state, rmw, rcw); in handle_stripe_dirtying()
4130 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_dirtying()
4131 if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { in handle_stripe_dirtying()
4132 /* prefer read-modify-write, but need to get some data */ in handle_stripe_dirtying()
4133 if (conf->mddev->queue) in handle_stripe_dirtying()
4134 blk_add_trace_msg(conf->mddev->queue, in handle_stripe_dirtying()
4136 (unsigned long long)sh->sector, rmw); in handle_stripe_dirtying()
4137 for (i = disks; i--; ) { in handle_stripe_dirtying()
4138 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4139 if (test_bit(R5_InJournal, &dev->flags) && in handle_stripe_dirtying()
4140 dev->page == dev->orig_page && in handle_stripe_dirtying()
4141 !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { in handle_stripe_dirtying()
4146 dev->orig_page = p; in handle_stripe_dirtying()
4152 * disk_info->extra_page in handle_stripe_dirtying()
4155 &conf->cache_state)) { in handle_stripe_dirtying()
4161 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4162 s->waiting_extra_page = 1; in handle_stripe_dirtying()
4163 return -EAGAIN; in handle_stripe_dirtying()
4167 for (i = disks; i--; ) { in handle_stripe_dirtying()
4168 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4169 if (((dev->towrite && !delay_towrite(conf, dev, s)) || in handle_stripe_dirtying()
4170 i == sh->pd_idx || i == sh->qd_idx || in handle_stripe_dirtying()
4171 test_bit(R5_InJournal, &dev->flags)) && in handle_stripe_dirtying()
4172 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4174 test_bit(R5_Wantcompute, &dev->flags)) && in handle_stripe_dirtying()
4175 test_bit(R5_Insync, &dev->flags)) { in handle_stripe_dirtying()
4177 &sh->state)) { in handle_stripe_dirtying()
4178 pr_debug("Read_old block %d for r-m-w\n", in handle_stripe_dirtying()
4180 set_bit(R5_LOCKED, &dev->flags); in handle_stripe_dirtying()
4181 set_bit(R5_Wantread, &dev->flags); in handle_stripe_dirtying()
4182 s->locked++; in handle_stripe_dirtying()
4184 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4188 if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) { in handle_stripe_dirtying()
4192 for (i = disks; i--; ) { in handle_stripe_dirtying()
4193 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4194 if (!test_bit(R5_OVERWRITE, &dev->flags) && in handle_stripe_dirtying()
4195 i != sh->pd_idx && i != sh->qd_idx && in handle_stripe_dirtying()
4196 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4197 !(test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_dirtying()
4198 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4200 if (test_bit(R5_Insync, &dev->flags) && in handle_stripe_dirtying()
4202 &sh->state)) { in handle_stripe_dirtying()
4205 set_bit(R5_LOCKED, &dev->flags); in handle_stripe_dirtying()
4206 set_bit(R5_Wantread, &dev->flags); in handle_stripe_dirtying()
4207 s->locked++; in handle_stripe_dirtying()
4210 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4213 if (rcw && conf->mddev->queue) in handle_stripe_dirtying()
4214 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", in handle_stripe_dirtying()
4215 (unsigned long long)sh->sector, in handle_stripe_dirtying()
4216 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); in handle_stripe_dirtying()
4220 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in handle_stripe_dirtying()
4221 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4233 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && in handle_stripe_dirtying()
4234 (s->locked == 0 && (rcw == 0 || rmw == 0) && in handle_stripe_dirtying()
4235 !test_bit(STRIPE_BIT_DELAY, &sh->state))) in handle_stripe_dirtying()
4240 static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, in handle_parity_checks5() argument
4245 BUG_ON(sh->batch_head); in handle_parity_checks5()
4246 set_bit(STRIPE_HANDLE, &sh->state); in handle_parity_checks5()
4248 switch (sh->check_state) { in handle_parity_checks5()
4251 if (s->failed == 0) { in handle_parity_checks5()
4252 BUG_ON(s->uptodate != disks); in handle_parity_checks5()
4253 sh->check_state = check_state_run; in handle_parity_checks5()
4254 set_bit(STRIPE_OP_CHECK, &s->ops_request); in handle_parity_checks5()
4255 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); in handle_parity_checks5()
4256 s->uptodate--; in handle_parity_checks5()
4259 dev = &sh->dev[s->failed_num[0]]; in handle_parity_checks5()
4262 sh->check_state = check_state_idle; in handle_parity_checks5()
4264 dev = &sh->dev[sh->pd_idx]; in handle_parity_checks5()
4267 if (test_bit(STRIPE_INSYNC, &sh->state)) in handle_parity_checks5()
4271 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); in handle_parity_checks5()
4272 BUG_ON(s->uptodate != disks); in handle_parity_checks5()
4274 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks5()
4275 s->locked++; in handle_parity_checks5()
4276 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks5()
4278 clear_bit(STRIPE_DEGRADED, &sh->state); in handle_parity_checks5()
4279 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4284 sh->check_state = check_state_idle; in handle_parity_checks5()
4289 if (s->failed) in handle_parity_checks5()
4296 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) in handle_parity_checks5()
4300 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4302 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); in handle_parity_checks5()
4303 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { in handle_parity_checks5()
4305 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4307 "%llu-%llu\n", mdname(conf->mddev), in handle_parity_checks5()
4308 (unsigned long long) sh->sector, in handle_parity_checks5()
4309 (unsigned long long) sh->sector + in handle_parity_checks5()
4310 RAID5_STRIPE_SECTORS(conf)); in handle_parity_checks5()
4312 sh->check_state = check_state_compute_run; in handle_parity_checks5()
4313 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in handle_parity_checks5()
4314 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in handle_parity_checks5()
4316 &sh->dev[sh->pd_idx].flags); in handle_parity_checks5()
4317 sh->ops.target = sh->pd_idx; in handle_parity_checks5()
4318 sh->ops.target2 = -1; in handle_parity_checks5()
4319 s->uptodate++; in handle_parity_checks5()
4327 __func__, sh->check_state, in handle_parity_checks5()
4328 (unsigned long long) sh->sector); in handle_parity_checks5()
4333 static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, in handle_parity_checks6() argument
4337 int pd_idx = sh->pd_idx; in handle_parity_checks6()
4338 int qd_idx = sh->qd_idx; in handle_parity_checks6()
4341 BUG_ON(sh->batch_head); in handle_parity_checks6()
4342 set_bit(STRIPE_HANDLE, &sh->state); in handle_parity_checks6()
4344 BUG_ON(s->failed > 2); in handle_parity_checks6()
4352 switch (sh->check_state) { in handle_parity_checks6()
4355 if (s->failed == s->q_failed) { in handle_parity_checks6()
4360 sh->check_state = check_state_run; in handle_parity_checks6()
4362 if (!s->q_failed && s->failed < 2) { in handle_parity_checks6()
4366 if (sh->check_state == check_state_run) in handle_parity_checks6()
4367 sh->check_state = check_state_run_pq; in handle_parity_checks6()
4369 sh->check_state = check_state_run_q; in handle_parity_checks6()
4373 sh->ops.zero_sum_result = 0; in handle_parity_checks6()
4375 if (sh->check_state == check_state_run) { in handle_parity_checks6()
4377 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); in handle_parity_checks6()
4378 s->uptodate--; in handle_parity_checks6()
4380 if (sh->check_state >= check_state_run && in handle_parity_checks6()
4381 sh->check_state <= check_state_run_pq) { in handle_parity_checks6()
4385 set_bit(STRIPE_OP_CHECK, &s->ops_request); in handle_parity_checks6()
4389 /* we have 2-disk failure */ in handle_parity_checks6()
4390 BUG_ON(s->failed != 2); in handle_parity_checks6()
4393 sh->check_state = check_state_idle; in handle_parity_checks6()
4396 if (test_bit(STRIPE_INSYNC, &sh->state)) in handle_parity_checks6()
4403 if (s->failed == 2) { in handle_parity_checks6()
4404 dev = &sh->dev[s->failed_num[1]]; in handle_parity_checks6()
4405 s->locked++; in handle_parity_checks6()
4406 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4407 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4409 if (s->failed >= 1) { in handle_parity_checks6()
4410 dev = &sh->dev[s->failed_num[0]]; in handle_parity_checks6()
4411 s->locked++; in handle_parity_checks6()
4412 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4413 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4415 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { in handle_parity_checks6()
4416 dev = &sh->dev[pd_idx]; in handle_parity_checks6()
4417 s->locked++; in handle_parity_checks6()
4418 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4419 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4421 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { in handle_parity_checks6()
4422 dev = &sh->dev[qd_idx]; in handle_parity_checks6()
4423 s->locked++; in handle_parity_checks6()
4424 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4425 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4427 if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), in handle_parity_checks6()
4429 mdname(conf->mddev), in handle_parity_checks6()
4430 dev - (struct r5dev *) &sh->dev)) { in handle_parity_checks6()
4431 clear_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4432 clear_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4433 s->locked--; in handle_parity_checks6()
4435 clear_bit(STRIPE_DEGRADED, &sh->state); in handle_parity_checks6()
4437 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4444 sh->check_state = check_state_idle; in handle_parity_checks6()
4450 if (sh->ops.zero_sum_result == 0) { in handle_parity_checks6()
4452 if (!s->failed) in handle_parity_checks6()
4453 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4459 sh->check_state = check_state_compute_result; in handle_parity_checks6()
4467 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); in handle_parity_checks6()
4468 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { in handle_parity_checks6()
4470 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4472 "%llu-%llu\n", mdname(conf->mddev), in handle_parity_checks6()
4473 (unsigned long long) sh->sector, in handle_parity_checks6()
4474 (unsigned long long) sh->sector + in handle_parity_checks6()
4475 RAID5_STRIPE_SECTORS(conf)); in handle_parity_checks6()
4477 int *target = &sh->ops.target; in handle_parity_checks6()
4479 sh->ops.target = -1; in handle_parity_checks6()
4480 sh->ops.target2 = -1; in handle_parity_checks6()
4481 sh->check_state = check_state_compute_run; in handle_parity_checks6()
4482 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in handle_parity_checks6()
4483 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in handle_parity_checks6()
4484 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { in handle_parity_checks6()
4486 &sh->dev[pd_idx].flags); in handle_parity_checks6()
4488 target = &sh->ops.target2; in handle_parity_checks6()
4489 s->uptodate++; in handle_parity_checks6()
4491 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { in handle_parity_checks6()
4493 &sh->dev[qd_idx].flags); in handle_parity_checks6()
4495 s->uptodate++; in handle_parity_checks6()
4504 __func__, sh->check_state, in handle_parity_checks6()
4505 (unsigned long long) sh->sector); in handle_parity_checks6()
4510 static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) in handle_stripe_expansion() argument
4517 struct dma_async_tx_descriptor *tx = NULL; in handle_stripe_expansion() local
4518 BUG_ON(sh->batch_head); in handle_stripe_expansion()
4519 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); in handle_stripe_expansion()
4520 for (i = 0; i < sh->disks; i++) in handle_stripe_expansion()
4521 if (i != sh->pd_idx && i != sh->qd_idx) { in handle_stripe_expansion()
4527 sector_t s = raid5_compute_sector(conf, bn, 0, in handle_stripe_expansion()
4529 sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1); in handle_stripe_expansion()
4536 if (!test_bit(STRIPE_EXPANDING, &sh2->state) || in handle_stripe_expansion()
4537 test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { in handle_stripe_expansion()
4544 init_async_submit(&submit, 0, tx, NULL, NULL, NULL); in handle_stripe_expansion()
4545 tx = async_memcpy(sh2->dev[dd_idx].page, in handle_stripe_expansion()
4546 sh->dev[i].page, sh2->dev[dd_idx].offset, in handle_stripe_expansion()
4547 sh->dev[i].offset, RAID5_STRIPE_SIZE(conf), in handle_stripe_expansion()
4550 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); in handle_stripe_expansion()
4551 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); in handle_stripe_expansion()
4552 for (j = 0; j < conf->raid_disks; j++) in handle_stripe_expansion()
4553 if (j != sh2->pd_idx && in handle_stripe_expansion()
4554 j != sh2->qd_idx && in handle_stripe_expansion()
4555 !test_bit(R5_Expanded, &sh2->dev[j].flags)) in handle_stripe_expansion()
4557 if (j == conf->raid_disks) { in handle_stripe_expansion()
4558 set_bit(STRIPE_EXPAND_READY, &sh2->state); in handle_stripe_expansion()
4559 set_bit(STRIPE_HANDLE, &sh2->state); in handle_stripe_expansion()
4565 async_tx_quiesce(&tx); in handle_stripe_expansion()
4569 * handle_stripe - do things to a stripe.
4584 struct r5conf *conf = sh->raid_conf; in analyse_stripe() local
4585 int disks = sh->disks; in analyse_stripe()
4592 s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head; in analyse_stripe()
4593 s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head; in analyse_stripe()
4594 s->failed_num[0] = -1; in analyse_stripe()
4595 s->failed_num[1] = -1; in analyse_stripe()
4596 s->log_failed = r5l_log_disk_error(conf); in analyse_stripe()
4600 for (i=disks; i--; ) { in analyse_stripe()
4606 dev = &sh->dev[i]; in analyse_stripe()
4609 i, dev->flags, in analyse_stripe()
4610 dev->toread, dev->towrite, dev->written); in analyse_stripe()
4616 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && in analyse_stripe()
4617 !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) in analyse_stripe()
4618 set_bit(R5_Wantfill, &dev->flags); in analyse_stripe()
4621 if (test_bit(R5_LOCKED, &dev->flags)) in analyse_stripe()
4622 s->locked++; in analyse_stripe()
4623 if (test_bit(R5_UPTODATE, &dev->flags)) in analyse_stripe()
4624 s->uptodate++; in analyse_stripe()
4625 if (test_bit(R5_Wantcompute, &dev->flags)) { in analyse_stripe()
4626 s->compute++; in analyse_stripe()
4627 BUG_ON(s->compute > 2); in analyse_stripe()
4630 if (test_bit(R5_Wantfill, &dev->flags)) in analyse_stripe()
4631 s->to_fill++; in analyse_stripe()
4632 else if (dev->toread) in analyse_stripe()
4633 s->to_read++; in analyse_stripe()
4634 if (dev->towrite) { in analyse_stripe()
4635 s->to_write++; in analyse_stripe()
4636 if (!test_bit(R5_OVERWRITE, &dev->flags)) in analyse_stripe()
4637 s->non_overwrite++; in analyse_stripe()
4639 if (dev->written) in analyse_stripe()
4640 s->written++; in analyse_stripe()
4644 rdev = rcu_dereference(conf->disks[i].replacement); in analyse_stripe()
4645 if (rdev && !test_bit(Faulty, &rdev->flags) && in analyse_stripe()
4646 rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && in analyse_stripe()
4647 !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in analyse_stripe()
4649 set_bit(R5_ReadRepl, &dev->flags); in analyse_stripe()
4651 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4652 set_bit(R5_NeedReplace, &dev->flags); in analyse_stripe()
4654 clear_bit(R5_NeedReplace, &dev->flags); in analyse_stripe()
4655 rdev = rcu_dereference(conf->disks[i].rdev); in analyse_stripe()
4656 clear_bit(R5_ReadRepl, &dev->flags); in analyse_stripe()
4658 if (rdev && test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4661 is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in analyse_stripe()
4663 if (s->blocked_rdev == NULL in analyse_stripe()
4664 && (test_bit(Blocked, &rdev->flags) in analyse_stripe()
4668 &rdev->flags); in analyse_stripe()
4669 s->blocked_rdev = rdev; in analyse_stripe()
4670 atomic_inc(&rdev->nr_pending); in analyse_stripe()
4673 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4675 /* Not in-sync */; in analyse_stripe()
4677 /* also not in-sync */ in analyse_stripe()
4678 if (!test_bit(WriteErrorSeen, &rdev->flags) && in analyse_stripe()
4679 test_bit(R5_UPTODATE, &dev->flags)) { in analyse_stripe()
4680 /* treat as in-sync, but with a read error in analyse_stripe()
4683 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4684 set_bit(R5_ReadError, &dev->flags); in analyse_stripe()
4686 } else if (test_bit(In_sync, &rdev->flags)) in analyse_stripe()
4687 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4688 else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset) in analyse_stripe()
4690 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4691 else if (test_bit(R5_UPTODATE, &dev->flags) && in analyse_stripe()
4692 test_bit(R5_Expanded, &dev->flags)) in analyse_stripe()
4697 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4699 if (test_bit(R5_WriteError, &dev->flags)) { in analyse_stripe()
4703 conf->disks[i].rdev); in analyse_stripe()
4705 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4706 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4707 s->handle_bad_blocks = 1; in analyse_stripe()
4708 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4710 clear_bit(R5_WriteError, &dev->flags); in analyse_stripe()
4712 if (test_bit(R5_MadeGood, &dev->flags)) { in analyse_stripe()
4716 conf->disks[i].rdev); in analyse_stripe()
4717 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4718 s->handle_bad_blocks = 1; in analyse_stripe()
4719 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4721 clear_bit(R5_MadeGood, &dev->flags); in analyse_stripe()
4723 if (test_bit(R5_MadeGoodRepl, &dev->flags)) { in analyse_stripe()
4725 conf->disks[i].replacement); in analyse_stripe()
4726 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4727 s->handle_bad_blocks = 1; in analyse_stripe()
4728 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4730 clear_bit(R5_MadeGoodRepl, &dev->flags); in analyse_stripe()
4732 if (!test_bit(R5_Insync, &dev->flags)) { in analyse_stripe()
4734 clear_bit(R5_ReadError, &dev->flags); in analyse_stripe()
4735 clear_bit(R5_ReWrite, &dev->flags); in analyse_stripe()
4737 if (test_bit(R5_ReadError, &dev->flags)) in analyse_stripe()
4738 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4739 if (!test_bit(R5_Insync, &dev->flags)) { in analyse_stripe()
4740 if (s->failed < 2) in analyse_stripe()
4741 s->failed_num[s->failed] = i; in analyse_stripe()
4742 s->failed++; in analyse_stripe()
4743 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4747 conf->disks[i].replacement); in analyse_stripe()
4748 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4753 if (test_bit(R5_InJournal, &dev->flags)) in analyse_stripe()
4754 s->injournal++; in analyse_stripe()
4755 if (test_bit(R5_InJournal, &dev->flags) && dev->written) in analyse_stripe()
4756 s->just_cached++; in analyse_stripe()
4758 if (test_bit(STRIPE_SYNCING, &sh->state)) { in analyse_stripe()
4768 sh->sector >= conf->mddev->recovery_cp || in analyse_stripe()
4769 test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery))) in analyse_stripe()
4770 s->syncing = 1; in analyse_stripe()
4772 s->replacing = 1; in analyse_stripe()
4784 if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) in clear_batch_ready()
4785 return (sh->batch_head && sh->batch_head != sh); in clear_batch_ready()
4786 spin_lock(&sh->stripe_lock); in clear_batch_ready()
4787 if (!sh->batch_head) { in clear_batch_ready()
4788 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4796 if (sh->batch_head != sh) { in clear_batch_ready()
4797 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4800 spin_lock(&sh->batch_lock); in clear_batch_ready()
4801 list_for_each_entry(tmp, &sh->batch_list, batch_list) in clear_batch_ready()
4802 clear_bit(STRIPE_BATCH_READY, &tmp->state); in clear_batch_ready()
4803 spin_unlock(&sh->batch_lock); in clear_batch_ready()
4804 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4820 list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { in break_stripe_batch_list()
4822 list_del_init(&sh->batch_list); in break_stripe_batch_list()
4824 WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | in break_stripe_batch_list()
4836 "stripe state: %lx\n", sh->state); in break_stripe_batch_list()
4837 WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | in break_stripe_batch_list()
4839 "head stripe state: %lx\n", head_sh->state); in break_stripe_batch_list()
4841 set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | in break_stripe_batch_list()
4845 head_sh->state & (1 << STRIPE_INSYNC)); in break_stripe_batch_list()
4847 sh->check_state = head_sh->check_state; in break_stripe_batch_list()
4848 sh->reconstruct_state = head_sh->reconstruct_state; in break_stripe_batch_list()
4849 spin_lock_irq(&sh->stripe_lock); in break_stripe_batch_list()
4850 sh->batch_head = NULL; in break_stripe_batch_list()
4851 spin_unlock_irq(&sh->stripe_lock); in break_stripe_batch_list()
4852 for (i = 0; i < sh->disks; i++) { in break_stripe_batch_list()
4853 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in break_stripe_batch_list()
4855 sh->dev[i].flags = head_sh->dev[i].flags & in break_stripe_batch_list()
4859 sh->state & handle_flags) in break_stripe_batch_list()
4860 set_bit(STRIPE_HANDLE, &sh->state); in break_stripe_batch_list()
4863 spin_lock_irq(&head_sh->stripe_lock); in break_stripe_batch_list()
4864 head_sh->batch_head = NULL; in break_stripe_batch_list()
4865 spin_unlock_irq(&head_sh->stripe_lock); in break_stripe_batch_list()
4866 for (i = 0; i < head_sh->disks; i++) in break_stripe_batch_list()
4867 if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) in break_stripe_batch_list()
4869 if (head_sh->state & handle_flags) in break_stripe_batch_list()
4870 set_bit(STRIPE_HANDLE, &head_sh->state); in break_stripe_batch_list()
4873 wake_up(&head_sh->raid_conf->wait_for_overlap); in break_stripe_batch_list()
4879 struct r5conf *conf = sh->raid_conf; in handle_stripe() local
4882 int disks = sh->disks; in handle_stripe()
4885 clear_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
4896 if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) { in handle_stripe()
4899 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
4903 if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) in handle_stripe()
4906 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { in handle_stripe()
4907 spin_lock(&sh->stripe_lock); in handle_stripe()
4912 if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && in handle_stripe()
4913 !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && in handle_stripe()
4914 !test_bit(STRIPE_DISCARD, &sh->state) && in handle_stripe()
4915 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { in handle_stripe()
4916 set_bit(STRIPE_SYNCING, &sh->state); in handle_stripe()
4917 clear_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
4918 clear_bit(STRIPE_REPLACED, &sh->state); in handle_stripe()
4920 spin_unlock(&sh->stripe_lock); in handle_stripe()
4922 clear_bit(STRIPE_DELAYED, &sh->state); in handle_stripe()
4926 (unsigned long long)sh->sector, sh->state, in handle_stripe()
4927 atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, in handle_stripe()
4928 sh->check_state, sh->reconstruct_state); in handle_stripe()
4932 if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) in handle_stripe()
4936 test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { in handle_stripe()
4937 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
4944 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
4948 rdev_dec_pending(s.blocked_rdev, conf->mddev); in handle_stripe()
4952 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { in handle_stripe()
4954 set_bit(STRIPE_BIOFILL_RUN, &sh->state); in handle_stripe()
4968 if (s.failed > conf->max_degraded || in handle_stripe()
4970 sh->check_state = 0; in handle_stripe()
4971 sh->reconstruct_state = 0; in handle_stripe()
4974 handle_failed_stripe(conf, sh, &s, disks); in handle_stripe()
4976 handle_failed_sync(conf, sh, &s); in handle_stripe()
4983 if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) in handle_stripe()
4985 if (sh->reconstruct_state == reconstruct_state_drain_result || in handle_stripe()
4986 sh->reconstruct_state == reconstruct_state_prexor_drain_result) { in handle_stripe()
4987 sh->reconstruct_state = reconstruct_state_idle; in handle_stripe()
4992 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) && in handle_stripe()
4993 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); in handle_stripe()
4994 BUG_ON(sh->qd_idx >= 0 && in handle_stripe()
4995 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && in handle_stripe()
4996 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); in handle_stripe()
4997 for (i = disks; i--; ) { in handle_stripe()
4998 struct r5dev *dev = &sh->dev[i]; in handle_stripe()
4999 if (test_bit(R5_LOCKED, &dev->flags) && in handle_stripe()
5000 (i == sh->pd_idx || i == sh->qd_idx || in handle_stripe()
5001 dev->written || test_bit(R5_InJournal, in handle_stripe()
5002 &dev->flags))) { in handle_stripe()
5004 set_bit(R5_Wantwrite, &dev->flags); in handle_stripe()
5009 if (!test_bit(R5_Insync, &dev->flags) || in handle_stripe()
5010 ((i == sh->pd_idx || i == sh->qd_idx) && in handle_stripe()
5012 set_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5015 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in handle_stripe()
5023 pdev = &sh->dev[sh->pd_idx]; in handle_stripe()
5024 s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) in handle_stripe()
5025 || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); in handle_stripe()
5026 qdev = &sh->dev[sh->qd_idx]; in handle_stripe()
5027 s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) in handle_stripe()
5028 || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) in handle_stripe()
5029 || conf->level < 6; in handle_stripe()
5032 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) in handle_stripe()
5033 && !test_bit(R5_LOCKED, &pdev->flags) in handle_stripe()
5034 && (test_bit(R5_UPTODATE, &pdev->flags) || in handle_stripe()
5035 test_bit(R5_Discard, &pdev->flags))))) && in handle_stripe()
5036 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) in handle_stripe()
5037 && !test_bit(R5_LOCKED, &qdev->flags) in handle_stripe()
5038 && (test_bit(R5_UPTODATE, &qdev->flags) || in handle_stripe()
5039 test_bit(R5_Discard, &qdev->flags)))))) in handle_stripe()
5040 handle_stripe_clean_event(conf, sh, disks); in handle_stripe()
5043 r5c_handle_cached_data_endio(conf, sh, disks); in handle_stripe()
5062 r5c_finish_stripe_write_out(conf, sh, &s); in handle_stripe()
5073 if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) { in handle_stripe()
5074 if (!r5c_is_writeback(conf->log)) { in handle_stripe()
5076 handle_stripe_dirtying(conf, sh, &s, disks); in handle_stripe()
5082 ret = r5c_try_caching_write(conf, sh, &s, in handle_stripe()
5085 * If caching phase failed: ret == -EAGAIN in handle_stripe()
5091 if (ret == -EAGAIN || in handle_stripe()
5093 (!test_bit(STRIPE_R5C_CACHING, &sh->state) && in handle_stripe()
5095 ret = handle_stripe_dirtying(conf, sh, &s, in handle_stripe()
5097 if (ret == -EAGAIN) in handle_stripe()
5108 if (sh->check_state || in handle_stripe()
5110 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && in handle_stripe()
5111 !test_bit(STRIPE_INSYNC, &sh->state))) { in handle_stripe()
5112 if (conf->level == 6) in handle_stripe()
5113 handle_parity_checks6(conf, sh, &s, disks); in handle_stripe()
5115 handle_parity_checks5(conf, sh, &s, disks); in handle_stripe()
5119 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) in handle_stripe()
5120 && !test_bit(STRIPE_REPLACED, &sh->state)) { in handle_stripe()
5122 for (i = 0; i < conf->raid_disks; i++) in handle_stripe()
5123 if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { in handle_stripe()
5124 WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); in handle_stripe()
5125 set_bit(R5_WantReplace, &sh->dev[i].flags); in handle_stripe()
5126 set_bit(R5_LOCKED, &sh->dev[i].flags); in handle_stripe()
5130 set_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5131 set_bit(STRIPE_REPLACED, &sh->state); in handle_stripe()
5134 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && in handle_stripe()
5135 test_bit(STRIPE_INSYNC, &sh->state)) { in handle_stripe()
5136 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); in handle_stripe()
5137 clear_bit(STRIPE_SYNCING, &sh->state); in handle_stripe()
5138 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) in handle_stripe()
5139 wake_up(&conf->wait_for_overlap); in handle_stripe()
5145 if (s.failed <= conf->max_degraded && !conf->mddev->ro) in handle_stripe()
5147 struct r5dev *dev = &sh->dev[s.failed_num[i]]; in handle_stripe()
5148 if (test_bit(R5_ReadError, &dev->flags) in handle_stripe()
5149 && !test_bit(R5_LOCKED, &dev->flags) in handle_stripe()
5150 && test_bit(R5_UPTODATE, &dev->flags) in handle_stripe()
5152 if (!test_bit(R5_ReWrite, &dev->flags)) { in handle_stripe()
5153 set_bit(R5_Wantwrite, &dev->flags); in handle_stripe()
5154 set_bit(R5_ReWrite, &dev->flags); in handle_stripe()
5157 set_bit(R5_Wantread, &dev->flags); in handle_stripe()
5158 set_bit(R5_LOCKED, &dev->flags); in handle_stripe()
5164 if (sh->reconstruct_state == reconstruct_state_result) { in handle_stripe()
5166 = raid5_get_active_stripe(conf, sh->sector, 1, 1, 1); in handle_stripe()
5167 if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { in handle_stripe()
5171 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe()
5172 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5174 &sh_src->state)) in handle_stripe()
5175 atomic_inc(&conf->preread_active_stripes); in handle_stripe()
5182 sh->reconstruct_state = reconstruct_state_idle; in handle_stripe()
5183 clear_bit(STRIPE_EXPANDING, &sh->state); in handle_stripe()
5184 for (i = conf->raid_disks; i--; ) { in handle_stripe()
5185 set_bit(R5_Wantwrite, &sh->dev[i].flags); in handle_stripe()
5186 set_bit(R5_LOCKED, &sh->dev[i].flags); in handle_stripe()
5191 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && in handle_stripe()
5192 !sh->reconstruct_state) { in handle_stripe()
5194 sh->disks = conf->raid_disks; in handle_stripe()
5195 stripe_set_idx(sh->sector, conf, 0, sh); in handle_stripe()
5197 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { in handle_stripe()
5198 clear_bit(STRIPE_EXPAND_READY, &sh->state); in handle_stripe()
5199 atomic_dec(&conf->reshape_stripes); in handle_stripe()
5200 wake_up(&conf->wait_for_overlap); in handle_stripe()
5201 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); in handle_stripe()
5205 !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) in handle_stripe()
5206 handle_stripe_expansion(conf, sh); in handle_stripe()
5211 if (conf->mddev->external) in handle_stripe()
5213 conf->mddev); in handle_stripe()
5220 conf->mddev); in handle_stripe()
5224 for (i = disks; i--; ) { in handle_stripe()
5226 struct r5dev *dev = &sh->dev[i]; in handle_stripe()
5227 if (test_and_clear_bit(R5_WriteError, &dev->flags)) { in handle_stripe()
5229 rdev = conf->disks[i].rdev; in handle_stripe()
5230 if (!rdev_set_badblocks(rdev, sh->sector, in handle_stripe()
5231 RAID5_STRIPE_SECTORS(conf), 0)) in handle_stripe()
5232 md_error(conf->mddev, rdev); in handle_stripe()
5233 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5235 if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { in handle_stripe()
5236 rdev = conf->disks[i].rdev; in handle_stripe()
5237 rdev_clear_badblocks(rdev, sh->sector, in handle_stripe()
5238 RAID5_STRIPE_SECTORS(conf), 0); in handle_stripe()
5239 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5241 if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { in handle_stripe()
5242 rdev = conf->disks[i].replacement; in handle_stripe()
5245 rdev = conf->disks[i].rdev; in handle_stripe()
5246 rdev_clear_badblocks(rdev, sh->sector, in handle_stripe()
5247 RAID5_STRIPE_SECTORS(conf), 0); in handle_stripe()
5248 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5262 atomic_dec(&conf->preread_active_stripes); in handle_stripe()
5263 if (atomic_read(&conf->preread_active_stripes) < in handle_stripe()
5265 md_wakeup_thread(conf->mddev->thread); in handle_stripe()
5268 clear_bit_unlock(STRIPE_ACTIVE, &sh->state); in handle_stripe()
5271 static void raid5_activate_delayed(struct r5conf *conf) in raid5_activate_delayed() argument
5273 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { in raid5_activate_delayed()
5274 while (!list_empty(&conf->delayed_list)) { in raid5_activate_delayed()
5275 struct list_head *l = conf->delayed_list.next; in raid5_activate_delayed()
5279 clear_bit(STRIPE_DELAYED, &sh->state); in raid5_activate_delayed()
5280 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in raid5_activate_delayed()
5281 atomic_inc(&conf->preread_active_stripes); in raid5_activate_delayed()
5282 list_add_tail(&sh->lru, &conf->hold_list); in raid5_activate_delayed()
5288 static void activate_bit_delay(struct r5conf *conf, in activate_bit_delay() argument
5293 list_add(&head, &conf->bitmap_list); in activate_bit_delay()
5294 list_del_init(&conf->bitmap_list); in activate_bit_delay()
5298 list_del_init(&sh->lru); in activate_bit_delay()
5299 atomic_inc(&sh->count); in activate_bit_delay()
5300 hash = sh->hash_lock_index; in activate_bit_delay()
5301 __release_stripe(conf, sh, &temp_inactive_list[hash]); in activate_bit_delay()
5307 struct r5conf *conf = mddev->private; in in_chunk_boundary() local
5308 sector_t sector = bio->bi_iter.bi_sector; in in_chunk_boundary()
5312 WARN_ON_ONCE(bio->bi_partno); in in_chunk_boundary()
5314 chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); in in_chunk_boundary()
5316 ((sector & (chunk_sectors - 1)) + bio_sectors); in in_chunk_boundary()
5323 static void add_bio_to_retry(struct bio *bi,struct r5conf *conf) in add_bio_to_retry() argument
5327 spin_lock_irqsave(&conf->device_lock, flags); in add_bio_to_retry()
5329 bi->bi_next = conf->retry_read_aligned_list; in add_bio_to_retry()
5330 conf->retry_read_aligned_list = bi; in add_bio_to_retry()
5332 spin_unlock_irqrestore(&conf->device_lock, flags); in add_bio_to_retry()
5333 md_wakeup_thread(conf->mddev->thread); in add_bio_to_retry()
5336 static struct bio *remove_bio_from_retry(struct r5conf *conf, in remove_bio_from_retry() argument
5341 bi = conf->retry_read_aligned; in remove_bio_from_retry()
5343 *offset = conf->retry_read_offset; in remove_bio_from_retry()
5344 conf->retry_read_aligned = NULL; in remove_bio_from_retry()
5347 bi = conf->retry_read_aligned_list; in remove_bio_from_retry()
5349 conf->retry_read_aligned_list = bi->bi_next; in remove_bio_from_retry()
5350 bi->bi_next = NULL; in remove_bio_from_retry()
5365 struct bio* raid_bi = bi->bi_private; in raid5_align_endio()
5367 struct r5conf *conf; in raid5_align_endio() local
5369 blk_status_t error = bi->bi_status; in raid5_align_endio()
5373 rdev = (void*)raid_bi->bi_next; in raid5_align_endio()
5374 raid_bi->bi_next = NULL; in raid5_align_endio()
5375 mddev = rdev->mddev; in raid5_align_endio()
5376 conf = mddev->private; in raid5_align_endio()
5378 rdev_dec_pending(rdev, conf->mddev); in raid5_align_endio()
5382 if (atomic_dec_and_test(&conf->active_aligned_reads)) in raid5_align_endio()
5383 wake_up(&conf->wait_for_quiescent); in raid5_align_endio()
5389 add_bio_to_retry(raid_bi, conf); in raid5_align_endio()
5394 struct r5conf *conf = mddev->private; in raid5_read_one_chunk() local
5407 align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); in raid5_read_one_chunk()
5414 align_bi->bi_end_io = raid5_align_endio; in raid5_read_one_chunk()
5415 align_bi->bi_private = raid_bio; in raid5_read_one_chunk()
5419 align_bi->bi_iter.bi_sector = in raid5_read_one_chunk()
5420 raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, in raid5_read_one_chunk()
5425 rdev = rcu_dereference(conf->disks[dd_idx].replacement); in raid5_read_one_chunk()
5426 if (!rdev || test_bit(Faulty, &rdev->flags) || in raid5_read_one_chunk()
5427 rdev->recovery_offset < end_sector) { in raid5_read_one_chunk()
5428 rdev = rcu_dereference(conf->disks[dd_idx].rdev); in raid5_read_one_chunk()
5430 (test_bit(Faulty, &rdev->flags) || in raid5_read_one_chunk()
5431 !(test_bit(In_sync, &rdev->flags) || in raid5_read_one_chunk()
5432 rdev->recovery_offset >= end_sector))) in raid5_read_one_chunk()
5436 if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) { in raid5_read_one_chunk()
5446 atomic_inc(&rdev->nr_pending); in raid5_read_one_chunk()
5448 raid_bio->bi_next = (void*)rdev; in raid5_read_one_chunk()
5449 bio_set_dev(align_bi, rdev->bdev); in raid5_read_one_chunk()
5451 if (is_badblock(rdev, align_bi->bi_iter.bi_sector, in raid5_read_one_chunk()
5459 /* No reshape active, so we can trust rdev->data_offset */ in raid5_read_one_chunk()
5460 align_bi->bi_iter.bi_sector += rdev->data_offset; in raid5_read_one_chunk()
5462 spin_lock_irq(&conf->device_lock); in raid5_read_one_chunk()
5463 wait_event_lock_irq(conf->wait_for_quiescent, in raid5_read_one_chunk()
5464 conf->quiesce == 0, in raid5_read_one_chunk()
5465 conf->device_lock); in raid5_read_one_chunk()
5466 atomic_inc(&conf->active_aligned_reads); in raid5_read_one_chunk()
5467 spin_unlock_irq(&conf->device_lock); in raid5_read_one_chunk()
5469 if (mddev->gendisk) in raid5_read_one_chunk()
5470 trace_block_bio_remap(align_bi->bi_disk->queue, in raid5_read_one_chunk()
5471 align_bi, disk_devt(mddev->gendisk), in raid5_read_one_chunk()
5472 raid_bio->bi_iter.bi_sector); in raid5_read_one_chunk()
5485 sector_t sector = raid_bio->bi_iter.bi_sector; in chunk_aligned_read()
5486 unsigned chunk_sects = mddev->chunk_sectors; in chunk_aligned_read()
5487 unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); in chunk_aligned_read()
5490 struct r5conf *conf = mddev->private; in chunk_aligned_read() local
5491 split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split); in chunk_aligned_read()
5503 /* __get_priority_stripe - get the next stripe to process
5513 static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) in __get_priority_stripe() argument
5518 bool second_try = !r5c_is_writeback(conf->log) && in __get_priority_stripe()
5519 !r5l_log_disk_error(conf); in __get_priority_stripe()
5520 bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || in __get_priority_stripe()
5521 r5l_log_disk_error(conf); in __get_priority_stripe()
5526 if (conf->worker_cnt_per_group == 0) { in __get_priority_stripe()
5527 handle_list = try_loprio ? &conf->loprio_list : in __get_priority_stripe()
5528 &conf->handle_list; in __get_priority_stripe()
5530 handle_list = try_loprio ? &conf->worker_groups[group].loprio_list : in __get_priority_stripe()
5531 &conf->worker_groups[group].handle_list; in __get_priority_stripe()
5532 wg = &conf->worker_groups[group]; in __get_priority_stripe()
5535 for (i = 0; i < conf->group_cnt; i++) { in __get_priority_stripe()
5536 handle_list = try_loprio ? &conf->worker_groups[i].loprio_list : in __get_priority_stripe()
5537 &conf->worker_groups[i].handle_list; in __get_priority_stripe()
5538 wg = &conf->worker_groups[i]; in __get_priority_stripe()
5547 list_empty(&conf->hold_list) ? "empty" : "busy", in __get_priority_stripe()
5548 atomic_read(&conf->pending_full_writes), conf->bypass_count); in __get_priority_stripe()
5551 sh = list_entry(handle_list->next, typeof(*sh), lru); in __get_priority_stripe()
5553 if (list_empty(&conf->hold_list)) in __get_priority_stripe()
5554 conf->bypass_count = 0; in __get_priority_stripe()
5555 else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) { in __get_priority_stripe()
5556 if (conf->hold_list.next == conf->last_hold) in __get_priority_stripe()
5557 conf->bypass_count++; in __get_priority_stripe()
5559 conf->last_hold = conf->hold_list.next; in __get_priority_stripe()
5560 conf->bypass_count -= conf->bypass_threshold; in __get_priority_stripe()
5561 if (conf->bypass_count < 0) in __get_priority_stripe()
5562 conf->bypass_count = 0; in __get_priority_stripe()
5565 } else if (!list_empty(&conf->hold_list) && in __get_priority_stripe()
5566 ((conf->bypass_threshold && in __get_priority_stripe()
5567 conf->bypass_count > conf->bypass_threshold) || in __get_priority_stripe()
5568 atomic_read(&conf->pending_full_writes) == 0)) { in __get_priority_stripe()
5570 list_for_each_entry(tmp, &conf->hold_list, lru) { in __get_priority_stripe()
5571 if (conf->worker_cnt_per_group == 0 || in __get_priority_stripe()
5573 !cpu_online(tmp->cpu) || in __get_priority_stripe()
5574 cpu_to_group(tmp->cpu) == group) { in __get_priority_stripe()
5581 conf->bypass_count -= conf->bypass_threshold; in __get_priority_stripe()
5582 if (conf->bypass_count < 0) in __get_priority_stripe()
5583 conf->bypass_count = 0; in __get_priority_stripe()
5597 wg->stripes_cnt--; in __get_priority_stripe()
5598 sh->group = NULL; in __get_priority_stripe()
5600 list_del_init(&sh->lru); in __get_priority_stripe()
5601 BUG_ON(atomic_inc_return(&sh->count) != 1); in __get_priority_stripe()
5616 struct mddev *mddev = cb->cb.data; in raid5_unplug()
5617 struct r5conf *conf = mddev->private; in raid5_unplug() local
5621 if (cb->list.next && !list_empty(&cb->list)) { in raid5_unplug()
5622 spin_lock_irq(&conf->device_lock); in raid5_unplug()
5623 while (!list_empty(&cb->list)) { in raid5_unplug()
5624 sh = list_first_entry(&cb->list, struct stripe_head, lru); in raid5_unplug()
5625 list_del_init(&sh->lru); in raid5_unplug()
5632 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); in raid5_unplug()
5637 hash = sh->hash_lock_index; in raid5_unplug()
5638 __release_stripe(conf, sh, &cb->temp_inactive_list[hash]); in raid5_unplug()
5641 spin_unlock_irq(&conf->device_lock); in raid5_unplug()
5643 release_inactive_stripe_list(conf, cb->temp_inactive_list, in raid5_unplug()
5645 if (mddev->queue) in raid5_unplug()
5646 trace_block_unplug(mddev->queue, cnt, !from_schedule); in raid5_unplug()
5665 if (cb->list.next == NULL) { in release_stripe_plug()
5667 INIT_LIST_HEAD(&cb->list); in release_stripe_plug()
5669 INIT_LIST_HEAD(cb->temp_inactive_list + i); in release_stripe_plug()
5672 if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) in release_stripe_plug()
5673 list_add_tail(&sh->lru, &cb->list); in release_stripe_plug()
5680 struct r5conf *conf = mddev->private; in make_discard_request() local
5685 if (mddev->reshape_position != MaxSector) in make_discard_request()
5689 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in make_discard_request()
5692 bi->bi_next = NULL; in make_discard_request()
5694 stripe_sectors = conf->chunk_sectors * in make_discard_request()
5695 (conf->raid_disks - conf->max_degraded); in make_discard_request()
5700 logical_sector *= conf->chunk_sectors; in make_discard_request()
5701 last_sector *= conf->chunk_sectors; in make_discard_request()
5704 logical_sector += RAID5_STRIPE_SECTORS(conf)) { in make_discard_request()
5708 sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0); in make_discard_request()
5709 prepare_to_wait(&conf->wait_for_overlap, &w, in make_discard_request()
5711 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); in make_discard_request()
5712 if (test_bit(STRIPE_SYNCING, &sh->state)) { in make_discard_request()
5717 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); in make_discard_request()
5718 spin_lock_irq(&sh->stripe_lock); in make_discard_request()
5719 for (d = 0; d < conf->raid_disks; d++) { in make_discard_request()
5720 if (d == sh->pd_idx || d == sh->qd_idx) in make_discard_request()
5722 if (sh->dev[d].towrite || sh->dev[d].toread) { in make_discard_request()
5723 set_bit(R5_Overlap, &sh->dev[d].flags); in make_discard_request()
5724 spin_unlock_irq(&sh->stripe_lock); in make_discard_request()
5730 set_bit(STRIPE_DISCARD, &sh->state); in make_discard_request()
5731 finish_wait(&conf->wait_for_overlap, &w); in make_discard_request()
5732 sh->overwrite_disks = 0; in make_discard_request()
5733 for (d = 0; d < conf->raid_disks; d++) { in make_discard_request()
5734 if (d == sh->pd_idx || d == sh->qd_idx) in make_discard_request()
5736 sh->dev[d].towrite = bi; in make_discard_request()
5737 set_bit(R5_OVERWRITE, &sh->dev[d].flags); in make_discard_request()
5740 sh->overwrite_disks++; in make_discard_request()
5742 spin_unlock_irq(&sh->stripe_lock); in make_discard_request()
5743 if (conf->mddev->bitmap) { in make_discard_request()
5745 d < conf->raid_disks - conf->max_degraded; in make_discard_request()
5747 md_bitmap_startwrite(mddev->bitmap, in make_discard_request()
5748 sh->sector, in make_discard_request()
5749 RAID5_STRIPE_SECTORS(conf), in make_discard_request()
5751 sh->bm_seq = conf->seq_flush + 1; in make_discard_request()
5752 set_bit(STRIPE_BIT_DELAY, &sh->state); in make_discard_request()
5755 set_bit(STRIPE_HANDLE, &sh->state); in make_discard_request()
5756 clear_bit(STRIPE_DELAYED, &sh->state); in make_discard_request()
5757 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in make_discard_request()
5758 atomic_inc(&conf->preread_active_stripes); in make_discard_request()
5767 struct r5conf *conf = mddev->private; in raid5_make_request() local
5777 if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { in raid5_make_request()
5778 int ret = log_handle_flush_request(conf, bi); in raid5_make_request()
5782 if (ret == -ENODEV) { in raid5_make_request()
5786 /* ret == -EAGAIN, fallback */ in raid5_make_request()
5791 do_flush = bi->bi_opf & REQ_PREFLUSH; in raid5_make_request()
5801 if (rw == READ && mddev->degraded == 0 && in raid5_make_request()
5802 mddev->reshape_position == MaxSector) { in raid5_make_request()
5814 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in raid5_make_request()
5816 bi->bi_next = NULL; in raid5_make_request()
5818 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); in raid5_make_request()
5819 for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { in raid5_make_request()
5825 seq = read_seqcount_begin(&conf->gen_lock); in raid5_make_request()
5828 prepare_to_wait(&conf->wait_for_overlap, &w, in raid5_make_request()
5830 if (unlikely(conf->reshape_progress != MaxSector)) { in raid5_make_request()
5833 * possible to see a half-updated value in raid5_make_request()
5839 spin_lock_irq(&conf->device_lock); in raid5_make_request()
5840 if (mddev->reshape_backwards in raid5_make_request()
5841 ? logical_sector < conf->reshape_progress in raid5_make_request()
5842 : logical_sector >= conf->reshape_progress) { in raid5_make_request()
5845 if (mddev->reshape_backwards in raid5_make_request()
5846 ? logical_sector < conf->reshape_safe in raid5_make_request()
5847 : logical_sector >= conf->reshape_safe) { in raid5_make_request()
5848 spin_unlock_irq(&conf->device_lock); in raid5_make_request()
5854 spin_unlock_irq(&conf->device_lock); in raid5_make_request()
5857 new_sector = raid5_compute_sector(conf, logical_sector, in raid5_make_request()
5864 sh = raid5_get_active_stripe(conf, new_sector, previous, in raid5_make_request()
5865 (bi->bi_opf & REQ_RAHEAD), 0); in raid5_make_request()
5877 spin_lock_irq(&conf->device_lock); in raid5_make_request()
5878 if (mddev->reshape_backwards in raid5_make_request()
5879 ? logical_sector >= conf->reshape_progress in raid5_make_request()
5880 : logical_sector < conf->reshape_progress) in raid5_make_request()
5883 spin_unlock_irq(&conf->device_lock); in raid5_make_request()
5891 if (read_seqcount_retry(&conf->gen_lock, seq)) { in raid5_make_request()
5899 if (test_bit(STRIPE_EXPANDING, &sh->state) || in raid5_make_request()
5905 md_wakeup_thread(mddev->thread); in raid5_make_request()
5912 set_bit(STRIPE_R5C_PREFLUSH, &sh->state); in raid5_make_request()
5917 set_bit(STRIPE_HANDLE, &sh->state); in raid5_make_request()
5918 clear_bit(STRIPE_DELAYED, &sh->state); in raid5_make_request()
5919 if ((!sh->batch_head || sh == sh->batch_head) && in raid5_make_request()
5920 (bi->bi_opf & REQ_SYNC) && in raid5_make_request()
5921 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in raid5_make_request()
5922 atomic_inc(&conf->preread_active_stripes); in raid5_make_request()
5925 /* cannot get stripe for read-ahead, just give-up */ in raid5_make_request()
5926 bi->bi_status = BLK_STS_IOERR; in raid5_make_request()
5930 finish_wait(&conf->wait_for_overlap, &w); in raid5_make_request()
5951 struct r5conf *conf = mddev->private; in reshape_request() local
5955 int raid_disks = conf->previous_raid_disks; in reshape_request()
5956 int data_disks = raid_disks - conf->max_degraded; in reshape_request()
5957 int new_data_disks = conf->raid_disks - conf->max_degraded; in reshape_request()
5968 if (mddev->reshape_backwards && in reshape_request()
5969 conf->reshape_progress < raid5_size(mddev, 0, 0)) { in reshape_request()
5971 - conf->reshape_progress; in reshape_request()
5972 } else if (mddev->reshape_backwards && in reshape_request()
5973 conf->reshape_progress == MaxSector) { in reshape_request()
5976 } else if (!mddev->reshape_backwards && in reshape_request()
5977 conf->reshape_progress > 0) in reshape_request()
5978 sector_nr = conf->reshape_progress; in reshape_request()
5981 mddev->curr_resync_completed = sector_nr; in reshape_request()
5982 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
5994 reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors); in reshape_request()
5997 * the data about to be copied would over-write the source of in reshape_request()
6002 writepos = conf->reshape_progress; in reshape_request()
6004 readpos = conf->reshape_progress; in reshape_request()
6006 safepos = conf->reshape_safe; in reshape_request()
6008 if (mddev->reshape_backwards) { in reshape_request()
6010 writepos -= reshape_sectors; in reshape_request()
6015 /* readpos and safepos are worst-case calculations. in reshape_request()
6019 readpos -= min_t(sector_t, reshape_sectors, readpos); in reshape_request()
6020 safepos -= min_t(sector_t, reshape_sectors, safepos); in reshape_request()
6026 if (mddev->reshape_backwards) { in reshape_request()
6027 BUG_ON(conf->reshape_progress == 0); in reshape_request()
6029 BUG_ON((mddev->dev_sectors & in reshape_request()
6030 ~((sector_t)reshape_sectors - 1)) in reshape_request()
6031 - reshape_sectors - stripe_addr in reshape_request()
6046 * ensure safety in the face of a crash - that must be done by userspace in reshape_request()
6058 if (conf->min_offset_diff < 0) { in reshape_request()
6059 safepos += -conf->min_offset_diff; in reshape_request()
6060 readpos += -conf->min_offset_diff; in reshape_request()
6062 writepos += conf->min_offset_diff; in reshape_request()
6064 if ((mddev->reshape_backwards in reshape_request()
6067 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { in reshape_request()
6069 wait_event(conf->wait_for_overlap, in reshape_request()
6070 atomic_read(&conf->reshape_stripes)==0 in reshape_request()
6071 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6072 if (atomic_read(&conf->reshape_stripes) != 0) in reshape_request()
6074 mddev->reshape_position = conf->reshape_progress; in reshape_request()
6075 mddev->curr_resync_completed = sector_nr; in reshape_request()
6076 if (!mddev->reshape_backwards) in reshape_request()
6079 if (rdev->raid_disk >= 0 && in reshape_request()
6080 !test_bit(Journal, &rdev->flags) && in reshape_request()
6081 !test_bit(In_sync, &rdev->flags) && in reshape_request()
6082 rdev->recovery_offset < sector_nr) in reshape_request()
6083 rdev->recovery_offset = sector_nr; in reshape_request()
6085 conf->reshape_checkpoint = jiffies; in reshape_request()
6086 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in reshape_request()
6087 md_wakeup_thread(mddev->thread); in reshape_request()
6088 wait_event(mddev->sb_wait, mddev->sb_flags == 0 || in reshape_request()
6089 test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6090 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) in reshape_request()
6092 spin_lock_irq(&conf->device_lock); in reshape_request()
6093 conf->reshape_safe = mddev->reshape_position; in reshape_request()
6094 spin_unlock_irq(&conf->device_lock); in reshape_request()
6095 wake_up(&conf->wait_for_overlap); in reshape_request()
6096 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6100 for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { in reshape_request()
6103 sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1); in reshape_request()
6104 set_bit(STRIPE_EXPANDING, &sh->state); in reshape_request()
6105 atomic_inc(&conf->reshape_stripes); in reshape_request()
6109 for (j=sh->disks; j--;) { in reshape_request()
6111 if (j == sh->pd_idx) in reshape_request()
6113 if (conf->level == 6 && in reshape_request()
6114 j == sh->qd_idx) in reshape_request()
6121 memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf)); in reshape_request()
6122 set_bit(R5_Expanded, &sh->dev[j].flags); in reshape_request()
6123 set_bit(R5_UPTODATE, &sh->dev[j].flags); in reshape_request()
6126 set_bit(STRIPE_EXPAND_READY, &sh->state); in reshape_request()
6127 set_bit(STRIPE_HANDLE, &sh->state); in reshape_request()
6129 list_add(&sh->lru, &stripes); in reshape_request()
6131 spin_lock_irq(&conf->device_lock); in reshape_request()
6132 if (mddev->reshape_backwards) in reshape_request()
6133 conf->reshape_progress -= reshape_sectors * new_data_disks; in reshape_request()
6135 conf->reshape_progress += reshape_sectors * new_data_disks; in reshape_request()
6136 spin_unlock_irq(&conf->device_lock); in reshape_request()
6143 raid5_compute_sector(conf, stripe_addr*(new_data_disks), in reshape_request()
6146 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) in reshape_request()
6147 * new_data_disks - 1), in reshape_request()
6149 if (last_sector >= mddev->dev_sectors) in reshape_request()
6150 last_sector = mddev->dev_sectors - 1; in reshape_request()
6152 sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1); in reshape_request()
6153 set_bit(STRIPE_EXPAND_SOURCE, &sh->state); in reshape_request()
6154 set_bit(STRIPE_HANDLE, &sh->state); in reshape_request()
6156 first_sector += RAID5_STRIPE_SECTORS(conf); in reshape_request()
6163 list_del_init(&sh->lru); in reshape_request()
6172 if (mddev->curr_resync_completed > mddev->resync_max || in reshape_request()
6173 (sector_nr - mddev->curr_resync_completed) * 2 in reshape_request()
6174 >= mddev->resync_max - mddev->curr_resync_completed) { in reshape_request()
6176 wait_event(conf->wait_for_overlap, in reshape_request()
6177 atomic_read(&conf->reshape_stripes) == 0 in reshape_request()
6178 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6179 if (atomic_read(&conf->reshape_stripes) != 0) in reshape_request()
6181 mddev->reshape_position = conf->reshape_progress; in reshape_request()
6182 mddev->curr_resync_completed = sector_nr; in reshape_request()
6183 if (!mddev->reshape_backwards) in reshape_request()
6186 if (rdev->raid_disk >= 0 && in reshape_request()
6187 !test_bit(Journal, &rdev->flags) && in reshape_request()
6188 !test_bit(In_sync, &rdev->flags) && in reshape_request()
6189 rdev->recovery_offset < sector_nr) in reshape_request()
6190 rdev->recovery_offset = sector_nr; in reshape_request()
6191 conf->reshape_checkpoint = jiffies; in reshape_request()
6192 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in reshape_request()
6193 md_wakeup_thread(mddev->thread); in reshape_request()
6194 wait_event(mddev->sb_wait, in reshape_request()
6195 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) in reshape_request()
6196 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6197 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) in reshape_request()
6199 spin_lock_irq(&conf->device_lock); in reshape_request()
6200 conf->reshape_safe = mddev->reshape_position; in reshape_request()
6201 spin_unlock_irq(&conf->device_lock); in reshape_request()
6202 wake_up(&conf->wait_for_overlap); in reshape_request()
6203 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6212 struct r5conf *conf = mddev->private; in raid5_sync_request() local
6214 sector_t max_sector = mddev->dev_sectors; in raid5_sync_request()
6222 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { in raid5_sync_request()
6223 end_reshape(conf); in raid5_sync_request()
6227 if (mddev->curr_resync < max_sector) /* aborted */ in raid5_sync_request()
6228 md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, in raid5_sync_request()
6231 conf->fullsync = 0; in raid5_sync_request()
6232 md_bitmap_close_sync(mddev->bitmap); in raid5_sync_request()
6238 wait_event(conf->wait_for_overlap, conf->quiesce != 2); in raid5_sync_request()
6240 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) in raid5_sync_request()
6253 if (mddev->degraded >= conf->max_degraded && in raid5_sync_request()
6254 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { in raid5_sync_request()
6255 sector_t rv = mddev->dev_sectors - sector_nr; in raid5_sync_request()
6259 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && in raid5_sync_request()
6260 !conf->fullsync && in raid5_sync_request()
6261 !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && in raid5_sync_request()
6262 sync_blocks >= RAID5_STRIPE_SECTORS(conf)) { in raid5_sync_request()
6264 do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf)); in raid5_sync_request()
6267 return sync_blocks * RAID5_STRIPE_SECTORS(conf); in raid5_sync_request()
6270 md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); in raid5_sync_request()
6272 sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0); in raid5_sync_request()
6274 sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0); in raid5_sync_request()
6285 for (i = 0; i < conf->raid_disks; i++) { in raid5_sync_request()
6286 struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev); in raid5_sync_request()
6288 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) in raid5_sync_request()
6293 md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); in raid5_sync_request()
6295 set_bit(STRIPE_SYNC_REQUESTED, &sh->state); in raid5_sync_request()
6296 set_bit(STRIPE_HANDLE, &sh->state); in raid5_sync_request()
6300 return RAID5_STRIPE_SECTORS(conf); in raid5_sync_request()
6303 static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, in retry_aligned_read() argument
6308 * We cannot pre-allocate enough stripe_heads as we may need in retry_aligned_read()
6311 * ->bi_hw_segments how many have been done. in retry_aligned_read()
6322 logical_sector = raid_bio->bi_iter.bi_sector & in retry_aligned_read()
6323 ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in retry_aligned_read()
6324 sector = raid5_compute_sector(conf, logical_sector, in retry_aligned_read()
6329 logical_sector += RAID5_STRIPE_SECTORS(conf), in retry_aligned_read()
6330 sector += RAID5_STRIPE_SECTORS(conf), in retry_aligned_read()
6337 sh = raid5_get_active_stripe(conf, sector, 0, 1, 1); in retry_aligned_read()
6340 /* failed to get a stripe - must wait */ in retry_aligned_read()
6341 conf->retry_read_aligned = raid_bio; in retry_aligned_read()
6342 conf->retry_read_offset = scnt; in retry_aligned_read()
6348 conf->retry_read_aligned = raid_bio; in retry_aligned_read()
6349 conf->retry_read_offset = scnt; in retry_aligned_read()
6353 set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); in retry_aligned_read()
6361 if (atomic_dec_and_test(&conf->active_aligned_reads)) in retry_aligned_read()
6362 wake_up(&conf->wait_for_quiescent); in retry_aligned_read()
6366 static int handle_active_stripes(struct r5conf *conf, int group, in handle_active_stripes() argument
6369 __releases(&conf->device_lock) in handle_active_stripes()
6370 __acquires(&conf->device_lock) in handle_active_stripes()
6377 (sh = __get_priority_stripe(conf, group)) != NULL) in handle_active_stripes()
6385 spin_unlock_irq(&conf->device_lock); in handle_active_stripes()
6386 log_flush_stripe_to_raid(conf); in handle_active_stripes()
6387 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6392 spin_unlock_irq(&conf->device_lock); in handle_active_stripes()
6394 release_inactive_stripe_list(conf, temp_inactive_list, in handle_active_stripes()
6397 r5l_flush_stripe_to_raid(conf->log); in handle_active_stripes()
6399 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6405 log_write_stripe_run(conf); in handle_active_stripes()
6409 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6411 hash = batch[i]->hash_lock_index; in handle_active_stripes()
6412 __release_stripe(conf, batch[i], &temp_inactive_list[hash]); in handle_active_stripes()
6420 struct r5worker_group *group = worker->group; in raid5_do_work()
6421 struct r5conf *conf = group->conf; in raid5_do_work() local
6422 struct mddev *mddev = conf->mddev; in raid5_do_work()
6423 int group_id = group - conf->worker_groups; in raid5_do_work()
6431 spin_lock_irq(&conf->device_lock); in raid5_do_work()
6435 released = release_stripe_list(conf, worker->temp_inactive_list); in raid5_do_work()
6437 batch_size = handle_active_stripes(conf, group_id, worker, in raid5_do_work()
6438 worker->temp_inactive_list); in raid5_do_work()
6439 worker->working = false; in raid5_do_work()
6443 wait_event_lock_irq(mddev->sb_wait, in raid5_do_work()
6444 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), in raid5_do_work()
6445 conf->device_lock); in raid5_do_work()
6449 spin_unlock_irq(&conf->device_lock); in raid5_do_work()
6451 flush_deferred_bios(conf); in raid5_do_work()
6453 r5l_flush_stripe_to_raid(conf->log); in raid5_do_work()
6458 pr_debug("--- raid5worker inactive\n"); in raid5_do_work()
6470 struct mddev *mddev = thread->mddev; in raid5d()
6471 struct r5conf *conf = mddev->private; in raid5d() local
6481 spin_lock_irq(&conf->device_lock); in raid5d()
6487 released = release_stripe_list(conf, conf->temp_inactive_list); in raid5d()
6489 clear_bit(R5_DID_ALLOC, &conf->cache_state); in raid5d()
6492 !list_empty(&conf->bitmap_list)) { in raid5d()
6494 conf->seq_flush++; in raid5d()
6495 spin_unlock_irq(&conf->device_lock); in raid5d()
6496 md_bitmap_unplug(mddev->bitmap); in raid5d()
6497 spin_lock_irq(&conf->device_lock); in raid5d()
6498 conf->seq_write = conf->seq_flush; in raid5d()
6499 activate_bit_delay(conf, conf->temp_inactive_list); in raid5d()
6501 raid5_activate_delayed(conf); in raid5d()
6503 while ((bio = remove_bio_from_retry(conf, &offset))) { in raid5d()
6505 spin_unlock_irq(&conf->device_lock); in raid5d()
6506 ok = retry_aligned_read(conf, bio, offset); in raid5d()
6507 spin_lock_irq(&conf->device_lock); in raid5d()
6513 batch_size = handle_active_stripes(conf, ANY_GROUP, NULL, in raid5d()
6514 conf->temp_inactive_list); in raid5d()
6519 if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) { in raid5d()
6520 spin_unlock_irq(&conf->device_lock); in raid5d()
6522 spin_lock_irq(&conf->device_lock); in raid5d()
6532 wait_event_lock_irq(mddev->sb_wait, in raid5d()
6533 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), in raid5d()
6534 conf->device_lock); in raid5d()
6538 spin_unlock_irq(&conf->device_lock); in raid5d()
6539 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && in raid5d()
6540 mutex_trylock(&conf->cache_size_mutex)) { in raid5d()
6541 grow_one_stripe(conf, __GFP_NOWARN); in raid5d()
6545 set_bit(R5_DID_ALLOC, &conf->cache_state); in raid5d()
6546 mutex_unlock(&conf->cache_size_mutex); in raid5d()
6549 flush_deferred_bios(conf); in raid5d()
6551 r5l_flush_stripe_to_raid(conf->log); in raid5d()
6556 pr_debug("--- raid5d inactive\n"); in raid5d()
6562 struct r5conf *conf; in raid5_show_stripe_cache_size() local
6564 spin_lock(&mddev->lock); in raid5_show_stripe_cache_size()
6565 conf = mddev->private; in raid5_show_stripe_cache_size()
6566 if (conf) in raid5_show_stripe_cache_size()
6567 ret = sprintf(page, "%d\n", conf->min_nr_stripes); in raid5_show_stripe_cache_size()
6568 spin_unlock(&mddev->lock); in raid5_show_stripe_cache_size()
6576 struct r5conf *conf = mddev->private; in raid5_set_cache_size() local
6579 return -EINVAL; in raid5_set_cache_size()
6581 conf->min_nr_stripes = size; in raid5_set_cache_size()
6582 mutex_lock(&conf->cache_size_mutex); in raid5_set_cache_size()
6583 while (size < conf->max_nr_stripes && in raid5_set_cache_size()
6584 drop_one_stripe(conf)) in raid5_set_cache_size()
6586 mutex_unlock(&conf->cache_size_mutex); in raid5_set_cache_size()
6590 mutex_lock(&conf->cache_size_mutex); in raid5_set_cache_size()
6591 while (size > conf->max_nr_stripes) in raid5_set_cache_size()
6592 if (!grow_one_stripe(conf, GFP_KERNEL)) { in raid5_set_cache_size()
6593 conf->min_nr_stripes = conf->max_nr_stripes; in raid5_set_cache_size()
6594 result = -ENOMEM; in raid5_set_cache_size()
6597 mutex_unlock(&conf->cache_size_mutex); in raid5_set_cache_size()
6606 struct r5conf *conf; in raid5_store_stripe_cache_size() local
6611 return -EINVAL; in raid5_store_stripe_cache_size()
6613 return -EINVAL; in raid5_store_stripe_cache_size()
6617 conf = mddev->private; in raid5_store_stripe_cache_size()
6618 if (!conf) in raid5_store_stripe_cache_size()
6619 err = -ENODEV; in raid5_store_stripe_cache_size()
6635 struct r5conf *conf = mddev->private; in raid5_show_rmw_level() local
6636 if (conf) in raid5_show_rmw_level()
6637 return sprintf(page, "%d\n", conf->rmw_level); in raid5_show_rmw_level()
6645 struct r5conf *conf = mddev->private; in raid5_store_rmw_level() local
6648 if (!conf) in raid5_store_rmw_level()
6649 return -ENODEV; in raid5_store_rmw_level()
6652 return -EINVAL; in raid5_store_rmw_level()
6655 return -EINVAL; in raid5_store_rmw_level()
6658 return -EINVAL; in raid5_store_rmw_level()
6663 return -EINVAL; in raid5_store_rmw_level()
6665 conf->rmw_level = new; in raid5_store_rmw_level()
6677 struct r5conf *conf; in raid5_show_stripe_size() local
6680 spin_lock(&mddev->lock); in raid5_show_stripe_size()
6681 conf = mddev->private; in raid5_show_stripe_size()
6682 if (conf) in raid5_show_stripe_size()
6683 ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf)); in raid5_show_stripe_size()
6684 spin_unlock(&mddev->lock); in raid5_show_stripe_size()
6692 struct r5conf *conf; in raid5_store_stripe_size() local
6698 return -EINVAL; in raid5_store_stripe_size()
6700 return -EINVAL; in raid5_store_stripe_size()
6710 return -EINVAL; in raid5_store_stripe_size()
6716 conf = mddev->private; in raid5_store_stripe_size()
6717 if (!conf) { in raid5_store_stripe_size()
6718 err = -ENODEV; in raid5_store_stripe_size()
6722 if (new == conf->stripe_size) in raid5_store_stripe_size()
6726 conf->stripe_size, new); in raid5_store_stripe_size()
6728 if (mddev->sync_thread || in raid5_store_stripe_size()
6729 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || in raid5_store_stripe_size()
6730 mddev->reshape_position != MaxSector || in raid5_store_stripe_size()
6731 mddev->sysfs_active) { in raid5_store_stripe_size()
6732 err = -EBUSY; in raid5_store_stripe_size()
6737 mutex_lock(&conf->cache_size_mutex); in raid5_store_stripe_size()
6738 size = conf->max_nr_stripes; in raid5_store_stripe_size()
6740 shrink_stripes(conf); in raid5_store_stripe_size()
6742 conf->stripe_size = new; in raid5_store_stripe_size()
6743 conf->stripe_shift = ilog2(new) - 9; in raid5_store_stripe_size()
6744 conf->stripe_sectors = new >> 9; in raid5_store_stripe_size()
6745 if (grow_stripes(conf, size)) { in raid5_store_stripe_size()
6748 err = -ENOMEM; in raid5_store_stripe_size()
6750 mutex_unlock(&conf->cache_size_mutex); in raid5_store_stripe_size()
6772 struct r5conf *conf; in raid5_show_preread_threshold() local
6774 spin_lock(&mddev->lock); in raid5_show_preread_threshold()
6775 conf = mddev->private; in raid5_show_preread_threshold()
6776 if (conf) in raid5_show_preread_threshold()
6777 ret = sprintf(page, "%d\n", conf->bypass_threshold); in raid5_show_preread_threshold()
6778 spin_unlock(&mddev->lock); in raid5_show_preread_threshold()
6785 struct r5conf *conf; in raid5_store_preread_threshold() local
6790 return -EINVAL; in raid5_store_preread_threshold()
6792 return -EINVAL; in raid5_store_preread_threshold()
6797 conf = mddev->private; in raid5_store_preread_threshold()
6798 if (!conf) in raid5_store_preread_threshold()
6799 err = -ENODEV; in raid5_store_preread_threshold()
6800 else if (new > conf->min_nr_stripes) in raid5_store_preread_threshold()
6801 err = -EINVAL; in raid5_store_preread_threshold()
6803 conf->bypass_threshold = new; in raid5_store_preread_threshold()
6817 struct r5conf *conf; in raid5_show_skip_copy() local
6819 spin_lock(&mddev->lock); in raid5_show_skip_copy()
6820 conf = mddev->private; in raid5_show_skip_copy()
6821 if (conf) in raid5_show_skip_copy()
6822 ret = sprintf(page, "%d\n", conf->skip_copy); in raid5_show_skip_copy()
6823 spin_unlock(&mddev->lock); in raid5_show_skip_copy()
6830 struct r5conf *conf; in raid5_store_skip_copy() local
6835 return -EINVAL; in raid5_store_skip_copy()
6837 return -EINVAL; in raid5_store_skip_copy()
6843 conf = mddev->private; in raid5_store_skip_copy()
6844 if (!conf) in raid5_store_skip_copy()
6845 err = -ENODEV; in raid5_store_skip_copy()
6846 else if (new != conf->skip_copy) { in raid5_store_skip_copy()
6847 struct request_queue *q = mddev->queue; in raid5_store_skip_copy()
6850 conf->skip_copy = new; in raid5_store_skip_copy()
6869 struct r5conf *conf = mddev->private; in stripe_cache_active_show() local
6870 if (conf) in stripe_cache_active_show()
6871 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); in stripe_cache_active_show()
6882 struct r5conf *conf; in raid5_show_group_thread_cnt() local
6884 spin_lock(&mddev->lock); in raid5_show_group_thread_cnt()
6885 conf = mddev->private; in raid5_show_group_thread_cnt()
6886 if (conf) in raid5_show_group_thread_cnt()
6887 ret = sprintf(page, "%d\n", conf->worker_cnt_per_group); in raid5_show_group_thread_cnt()
6888 spin_unlock(&mddev->lock); in raid5_show_group_thread_cnt()
6892 static int alloc_thread_groups(struct r5conf *conf, int cnt,
6898 struct r5conf *conf; in raid5_store_group_thread_cnt() local
6905 return -EINVAL; in raid5_store_group_thread_cnt()
6907 return -EINVAL; in raid5_store_group_thread_cnt()
6910 return -EINVAL; in raid5_store_group_thread_cnt()
6915 conf = mddev->private; in raid5_store_group_thread_cnt()
6916 if (!conf) in raid5_store_group_thread_cnt()
6917 err = -ENODEV; in raid5_store_group_thread_cnt()
6918 else if (new != conf->worker_cnt_per_group) { in raid5_store_group_thread_cnt()
6921 old_groups = conf->worker_groups; in raid5_store_group_thread_cnt()
6925 err = alloc_thread_groups(conf, new, &group_cnt, &new_groups); in raid5_store_group_thread_cnt()
6927 spin_lock_irq(&conf->device_lock); in raid5_store_group_thread_cnt()
6928 conf->group_cnt = group_cnt; in raid5_store_group_thread_cnt()
6929 conf->worker_cnt_per_group = new; in raid5_store_group_thread_cnt()
6930 conf->worker_groups = new_groups; in raid5_store_group_thread_cnt()
6931 spin_unlock_irq(&conf->device_lock); in raid5_store_group_thread_cnt()
6966 static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt, in alloc_thread_groups() argument
6986 return -ENOMEM; in alloc_thread_groups()
6993 INIT_LIST_HEAD(&group->handle_list); in alloc_thread_groups()
6994 INIT_LIST_HEAD(&group->loprio_list); in alloc_thread_groups()
6995 group->conf = conf; in alloc_thread_groups()
6996 group->workers = workers + i * cnt; in alloc_thread_groups()
6999 struct r5worker *worker = group->workers + j; in alloc_thread_groups()
7000 worker->group = group; in alloc_thread_groups()
7001 INIT_WORK(&worker->work, raid5_do_work); in alloc_thread_groups()
7004 INIT_LIST_HEAD(worker->temp_inactive_list + k); in alloc_thread_groups()
7011 static void free_thread_groups(struct r5conf *conf) in free_thread_groups() argument
7013 if (conf->worker_groups) in free_thread_groups()
7014 kfree(conf->worker_groups[0].workers); in free_thread_groups()
7015 kfree(conf->worker_groups); in free_thread_groups()
7016 conf->worker_groups = NULL; in free_thread_groups()
7022 struct r5conf *conf = mddev->private; in raid5_size() local
7025 sectors = mddev->dev_sectors; in raid5_size()
7028 raid_disks = min(conf->raid_disks, conf->previous_raid_disks); in raid5_size()
7030 sectors &= ~((sector_t)conf->chunk_sectors - 1); in raid5_size()
7031 sectors &= ~((sector_t)conf->prev_chunk_sectors - 1); in raid5_size()
7032 return sectors * (raid_disks - conf->max_degraded); in raid5_size()
7035 static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) in free_scratch_buffer() argument
7037 safe_put_page(percpu->spare_page); in free_scratch_buffer()
7038 percpu->spare_page = NULL; in free_scratch_buffer()
7039 kvfree(percpu->scribble); in free_scratch_buffer()
7040 percpu->scribble = NULL; in free_scratch_buffer()
7043 static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) in alloc_scratch_buffer() argument
7045 if (conf->level == 6 && !percpu->spare_page) { in alloc_scratch_buffer()
7046 percpu->spare_page = alloc_page(GFP_KERNEL); in alloc_scratch_buffer()
7047 if (!percpu->spare_page) in alloc_scratch_buffer()
7048 return -ENOMEM; in alloc_scratch_buffer()
7052 max(conf->raid_disks, in alloc_scratch_buffer()
7053 conf->previous_raid_disks), in alloc_scratch_buffer()
7054 max(conf->chunk_sectors, in alloc_scratch_buffer()
7055 conf->prev_chunk_sectors) in alloc_scratch_buffer()
7056 / RAID5_STRIPE_SECTORS(conf))) { in alloc_scratch_buffer()
7057 free_scratch_buffer(conf, percpu); in alloc_scratch_buffer()
7058 return -ENOMEM; in alloc_scratch_buffer()
7066 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); in raid456_cpu_dead() local
7068 free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); in raid456_cpu_dead()
7072 static void raid5_free_percpu(struct r5conf *conf) in raid5_free_percpu() argument
7074 if (!conf->percpu) in raid5_free_percpu()
7077 cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); in raid5_free_percpu()
7078 free_percpu(conf->percpu); in raid5_free_percpu()
7081 static void free_conf(struct r5conf *conf) in free_conf() argument
7085 log_exit(conf); in free_conf()
7087 unregister_shrinker(&conf->shrinker); in free_conf()
7088 free_thread_groups(conf); in free_conf()
7089 shrink_stripes(conf); in free_conf()
7090 raid5_free_percpu(conf); in free_conf()
7091 for (i = 0; i < conf->pool_size; i++) in free_conf()
7092 if (conf->disks[i].extra_page) in free_conf()
7093 put_page(conf->disks[i].extra_page); in free_conf()
7094 kfree(conf->disks); in free_conf()
7095 bioset_exit(&conf->bio_split); in free_conf()
7096 kfree(conf->stripe_hashtbl); in free_conf()
7097 kfree(conf->pending_data); in free_conf()
7098 kfree(conf); in free_conf()
7103 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); in raid456_cpu_up_prepare() local
7104 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); in raid456_cpu_up_prepare()
7106 if (alloc_scratch_buffer(conf, percpu)) { in raid456_cpu_up_prepare()
7109 return -ENOMEM; in raid456_cpu_up_prepare()
7114 static int raid5_alloc_percpu(struct r5conf *conf) in raid5_alloc_percpu() argument
7118 conf->percpu = alloc_percpu(struct raid5_percpu); in raid5_alloc_percpu()
7119 if (!conf->percpu) in raid5_alloc_percpu()
7120 return -ENOMEM; in raid5_alloc_percpu()
7122 err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); in raid5_alloc_percpu()
7124 conf->scribble_disks = max(conf->raid_disks, in raid5_alloc_percpu()
7125 conf->previous_raid_disks); in raid5_alloc_percpu()
7126 conf->scribble_sectors = max(conf->chunk_sectors, in raid5_alloc_percpu()
7127 conf->prev_chunk_sectors); in raid5_alloc_percpu()
7135 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); in raid5_cache_scan() local
7138 if (mutex_trylock(&conf->cache_size_mutex)) { in raid5_cache_scan()
7140 while (ret < sc->nr_to_scan && in raid5_cache_scan()
7141 conf->max_nr_stripes > conf->min_nr_stripes) { in raid5_cache_scan()
7142 if (drop_one_stripe(conf) == 0) { in raid5_cache_scan()
7148 mutex_unlock(&conf->cache_size_mutex); in raid5_cache_scan()
7156 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); in raid5_cache_count() local
7158 if (conf->max_nr_stripes < conf->min_nr_stripes) in raid5_cache_count()
7161 return conf->max_nr_stripes - conf->min_nr_stripes; in raid5_cache_count()
7166 struct r5conf *conf; in setup_conf() local
7176 if (mddev->new_level != 5 in setup_conf()
7177 && mddev->new_level != 4 in setup_conf()
7178 && mddev->new_level != 6) { in setup_conf()
7180 mdname(mddev), mddev->new_level); in setup_conf()
7181 return ERR_PTR(-EIO); in setup_conf()
7183 if ((mddev->new_level == 5 in setup_conf()
7184 && !algorithm_valid_raid5(mddev->new_layout)) || in setup_conf()
7185 (mddev->new_level == 6 in setup_conf()
7186 && !algorithm_valid_raid6(mddev->new_layout))) { in setup_conf()
7188 mdname(mddev), mddev->new_layout); in setup_conf()
7189 return ERR_PTR(-EIO); in setup_conf()
7191 if (mddev->new_level == 6 && mddev->raid_disks < 4) { in setup_conf()
7193 mdname(mddev), mddev->raid_disks); in setup_conf()
7194 return ERR_PTR(-EINVAL); in setup_conf()
7197 if (!mddev->new_chunk_sectors || in setup_conf()
7198 (mddev->new_chunk_sectors << 9) % PAGE_SIZE || in setup_conf()
7199 !is_power_of_2(mddev->new_chunk_sectors)) { in setup_conf()
7201 mdname(mddev), mddev->new_chunk_sectors << 9); in setup_conf()
7202 return ERR_PTR(-EINVAL); in setup_conf()
7205 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); in setup_conf()
7206 if (conf == NULL) in setup_conf()
7210 conf->stripe_size = DEFAULT_STRIPE_SIZE; in setup_conf()
7211 conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9; in setup_conf()
7212 conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9; in setup_conf()
7214 INIT_LIST_HEAD(&conf->free_list); in setup_conf()
7215 INIT_LIST_HEAD(&conf->pending_list); in setup_conf()
7216 conf->pending_data = kcalloc(PENDING_IO_MAX, in setup_conf()
7219 if (!conf->pending_data) in setup_conf()
7222 list_add(&conf->pending_data[i].sibling, &conf->free_list); in setup_conf()
7223 /* Don't enable multi-threading by default*/ in setup_conf()
7224 if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) { in setup_conf()
7225 conf->group_cnt = group_cnt; in setup_conf()
7226 conf->worker_cnt_per_group = 0; in setup_conf()
7227 conf->worker_groups = new_group; in setup_conf()
7230 spin_lock_init(&conf->device_lock); in setup_conf()
7231 seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock); in setup_conf()
7232 mutex_init(&conf->cache_size_mutex); in setup_conf()
7233 init_waitqueue_head(&conf->wait_for_quiescent); in setup_conf()
7234 init_waitqueue_head(&conf->wait_for_stripe); in setup_conf()
7235 init_waitqueue_head(&conf->wait_for_overlap); in setup_conf()
7236 INIT_LIST_HEAD(&conf->handle_list); in setup_conf()
7237 INIT_LIST_HEAD(&conf->loprio_list); in setup_conf()
7238 INIT_LIST_HEAD(&conf->hold_list); in setup_conf()
7239 INIT_LIST_HEAD(&conf->delayed_list); in setup_conf()
7240 INIT_LIST_HEAD(&conf->bitmap_list); in setup_conf()
7241 init_llist_head(&conf->released_stripes); in setup_conf()
7242 atomic_set(&conf->active_stripes, 0); in setup_conf()
7243 atomic_set(&conf->preread_active_stripes, 0); in setup_conf()
7244 atomic_set(&conf->active_aligned_reads, 0); in setup_conf()
7245 spin_lock_init(&conf->pending_bios_lock); in setup_conf()
7246 conf->batch_bio_dispatch = true; in setup_conf()
7248 if (test_bit(Journal, &rdev->flags)) in setup_conf()
7250 if (blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { in setup_conf()
7251 conf->batch_bio_dispatch = false; in setup_conf()
7256 conf->bypass_threshold = BYPASS_THRESHOLD; in setup_conf()
7257 conf->recovery_disabled = mddev->recovery_disabled - 1; in setup_conf()
7259 conf->raid_disks = mddev->raid_disks; in setup_conf()
7260 if (mddev->reshape_position == MaxSector) in setup_conf()
7261 conf->previous_raid_disks = mddev->raid_disks; in setup_conf()
7263 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; in setup_conf()
7264 max_disks = max(conf->raid_disks, conf->previous_raid_disks); in setup_conf()
7266 conf->disks = kcalloc(max_disks, sizeof(struct disk_info), in setup_conf()
7269 if (!conf->disks) in setup_conf()
7273 conf->disks[i].extra_page = alloc_page(GFP_KERNEL); in setup_conf()
7274 if (!conf->disks[i].extra_page) in setup_conf()
7278 ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); in setup_conf()
7281 conf->mddev = mddev; in setup_conf()
7283 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) in setup_conf()
7291 spin_lock_init(conf->hash_locks); in setup_conf()
7293 spin_lock_init(conf->hash_locks + i); in setup_conf()
7296 INIT_LIST_HEAD(conf->inactive_list + i); in setup_conf()
7299 INIT_LIST_HEAD(conf->temp_inactive_list + i); in setup_conf()
7301 atomic_set(&conf->r5c_cached_full_stripes, 0); in setup_conf()
7302 INIT_LIST_HEAD(&conf->r5c_full_stripe_list); in setup_conf()
7303 atomic_set(&conf->r5c_cached_partial_stripes, 0); in setup_conf()
7304 INIT_LIST_HEAD(&conf->r5c_partial_stripe_list); in setup_conf()
7305 atomic_set(&conf->r5c_flushing_full_stripes, 0); in setup_conf()
7306 atomic_set(&conf->r5c_flushing_partial_stripes, 0); in setup_conf()
7308 conf->level = mddev->new_level; in setup_conf()
7309 conf->chunk_sectors = mddev->new_chunk_sectors; in setup_conf()
7310 if (raid5_alloc_percpu(conf) != 0) in setup_conf()
7316 raid_disk = rdev->raid_disk; in setup_conf()
7318 || raid_disk < 0 || test_bit(Journal, &rdev->flags)) in setup_conf()
7320 disk = conf->disks + raid_disk; in setup_conf()
7322 if (test_bit(Replacement, &rdev->flags)) { in setup_conf()
7323 if (disk->replacement) in setup_conf()
7325 disk->replacement = rdev; in setup_conf()
7327 if (disk->rdev) in setup_conf()
7329 disk->rdev = rdev; in setup_conf()
7332 if (test_bit(In_sync, &rdev->flags)) { in setup_conf()
7335 mdname(mddev), bdevname(rdev->bdev, b), raid_disk); in setup_conf()
7336 } else if (rdev->saved_raid_disk != raid_disk) in setup_conf()
7338 conf->fullsync = 1; in setup_conf()
7341 conf->level = mddev->new_level; in setup_conf()
7342 if (conf->level == 6) { in setup_conf()
7343 conf->max_degraded = 2; in setup_conf()
7345 conf->rmw_level = PARITY_ENABLE_RMW; in setup_conf()
7347 conf->rmw_level = PARITY_DISABLE_RMW; in setup_conf()
7349 conf->max_degraded = 1; in setup_conf()
7350 conf->rmw_level = PARITY_ENABLE_RMW; in setup_conf()
7352 conf->algorithm = mddev->new_layout; in setup_conf()
7353 conf->reshape_progress = mddev->reshape_position; in setup_conf()
7354 if (conf->reshape_progress != MaxSector) { in setup_conf()
7355 conf->prev_chunk_sectors = mddev->chunk_sectors; in setup_conf()
7356 conf->prev_algo = mddev->layout; in setup_conf()
7358 conf->prev_chunk_sectors = conf->chunk_sectors; in setup_conf()
7359 conf->prev_algo = conf->algorithm; in setup_conf()
7362 conf->min_nr_stripes = NR_STRIPES; in setup_conf()
7363 if (mddev->reshape_position != MaxSector) { in setup_conf()
7365 ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4, in setup_conf()
7366 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4); in setup_conf()
7367 conf->min_nr_stripes = max(NR_STRIPES, stripes); in setup_conf()
7368 if (conf->min_nr_stripes != NR_STRIPES) in setup_conf()
7370 mdname(mddev), conf->min_nr_stripes); in setup_conf()
7372 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + in setup_conf()
7374 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); in setup_conf()
7375 if (grow_stripes(conf, conf->min_nr_stripes)) { in setup_conf()
7386 conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4; in setup_conf()
7387 conf->shrinker.scan_objects = raid5_cache_scan; in setup_conf()
7388 conf->shrinker.count_objects = raid5_cache_count; in setup_conf()
7389 conf->shrinker.batch = 128; in setup_conf()
7390 conf->shrinker.flags = 0; in setup_conf()
7391 if (register_shrinker(&conf->shrinker)) { in setup_conf()
7397 sprintf(pers_name, "raid%d", mddev->new_level); in setup_conf()
7398 conf->thread = md_register_thread(raid5d, mddev, pers_name); in setup_conf()
7399 if (!conf->thread) { in setup_conf()
7405 return conf; in setup_conf()
7408 if (conf) { in setup_conf()
7409 free_conf(conf); in setup_conf()
7410 return ERR_PTR(-EIO); in setup_conf()
7412 return ERR_PTR(-ENOMEM); in setup_conf()
7423 if (raid_disk >= raid_disks - max_degraded) in only_parity()
7428 raid_disk == raid_disks - 1) in only_parity()
7435 if (raid_disk == raid_disks - 1) in only_parity()
7441 static void raid5_set_io_opt(struct r5conf *conf) in raid5_set_io_opt() argument
7443 blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * in raid5_set_io_opt()
7444 (conf->raid_disks - conf->max_degraded)); in raid5_set_io_opt()
7449 struct r5conf *conf; in raid5_run() local
7460 return -ENOMEM; in raid5_run()
7462 if (mddev->recovery_cp != MaxSector) in raid5_run()
7463 pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", in raid5_run()
7469 if (test_bit(Journal, &rdev->flags)) { in raid5_run()
7473 if (rdev->raid_disk < 0) in raid5_run()
7475 diff = (rdev->new_data_offset - rdev->data_offset); in raid5_run()
7479 } else if (mddev->reshape_backwards && in raid5_run()
7482 else if (!mddev->reshape_backwards && in raid5_run()
7487 if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) && in raid5_run()
7488 (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { in raid5_run()
7491 return -EINVAL; in raid5_run()
7494 if (mddev->reshape_position != MaxSector) { in raid5_run()
7504 * critical areas read-only and backed up. It will start in raid5_run()
7505 * the array in read-only mode, so we check for that. in raid5_run()
7509 int max_degraded = (mddev->level == 6 ? 2 : 1); in raid5_run()
7514 pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", in raid5_run()
7516 return -EINVAL; in raid5_run()
7519 if (mddev->new_level != mddev->level) { in raid5_run()
7520 pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", in raid5_run()
7522 return -EINVAL; in raid5_run()
7524 old_disks = mddev->raid_disks - mddev->delta_disks; in raid5_run()
7525 /* reshape_position must be on a new-stripe boundary, and one in raid5_run()
7532 here_new = mddev->reshape_position; in raid5_run()
7533 chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors); in raid5_run()
7534 new_data_disks = mddev->raid_disks - max_degraded; in raid5_run()
7538 return -EINVAL; in raid5_run()
7542 here_old = mddev->reshape_position; in raid5_run()
7543 sector_div(here_old, chunk_sectors * (old_disks-max_degraded)); in raid5_run()
7546 if (mddev->delta_disks == 0) { in raid5_run()
7547 /* We cannot be sure it is safe to start an in-place in raid5_run()
7548 * reshape. It is only safe if user-space is monitoring in raid5_run()
7554 if (abs(min_offset_diff) >= mddev->chunk_sectors && in raid5_run()
7555 abs(min_offset_diff) >= mddev->new_chunk_sectors) in raid5_run()
7556 /* not really in-place - so OK */; in raid5_run()
7557 else if (mddev->ro == 0) { in raid5_run()
7558 pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", in raid5_run()
7560 return -EINVAL; in raid5_run()
7562 } else if (mddev->reshape_backwards in raid5_run()
7566 here_old * chunk_sectors + (-min_offset_diff))) { in raid5_run()
7567 /* Reading from the same stripe as writing to - bad */ in raid5_run()
7568 pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", in raid5_run()
7570 return -EINVAL; in raid5_run()
7575 BUG_ON(mddev->level != mddev->new_level); in raid5_run()
7576 BUG_ON(mddev->layout != mddev->new_layout); in raid5_run()
7577 BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); in raid5_run()
7578 BUG_ON(mddev->delta_disks != 0); in raid5_run()
7581 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && in raid5_run()
7582 test_bit(MD_HAS_PPL, &mddev->flags)) { in raid5_run()
7583 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", in raid5_run()
7585 clear_bit(MD_HAS_PPL, &mddev->flags); in raid5_run()
7586 clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags); in raid5_run()
7589 if (mddev->private == NULL) in raid5_run()
7590 conf = setup_conf(mddev); in raid5_run()
7592 conf = mddev->private; in raid5_run()
7594 if (IS_ERR(conf)) in raid5_run()
7595 return PTR_ERR(conf); in raid5_run()
7597 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { in raid5_run()
7601 mddev->ro = 1; in raid5_run()
7602 set_disk_ro(mddev->gendisk, 1); in raid5_run()
7603 } else if (mddev->recovery_cp == MaxSector) in raid5_run()
7604 set_bit(MD_JOURNAL_CLEAN, &mddev->flags); in raid5_run()
7607 conf->min_offset_diff = min_offset_diff; in raid5_run()
7608 mddev->thread = conf->thread; in raid5_run()
7609 conf->thread = NULL; in raid5_run()
7610 mddev->private = conf; in raid5_run()
7612 for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; in raid5_run()
7614 rdev = conf->disks[i].rdev; in raid5_run()
7615 if (!rdev && conf->disks[i].replacement) { in raid5_run()
7617 rdev = conf->disks[i].replacement; in raid5_run()
7618 conf->disks[i].replacement = NULL; in raid5_run()
7619 clear_bit(Replacement, &rdev->flags); in raid5_run()
7620 conf->disks[i].rdev = rdev; in raid5_run()
7624 if (conf->disks[i].replacement && in raid5_run()
7625 conf->reshape_progress != MaxSector) { in raid5_run()
7630 if (test_bit(In_sync, &rdev->flags)) { in raid5_run()
7634 /* This disc is not fully in-sync. However if it in raid5_run()
7643 if (mddev->major_version == 0 && in raid5_run()
7644 mddev->minor_version > 90) in raid5_run()
7645 rdev->recovery_offset = reshape_offset; in raid5_run()
7647 if (rdev->recovery_offset < reshape_offset) { in raid5_run()
7649 if (!only_parity(rdev->raid_disk, in raid5_run()
7650 conf->algorithm, in raid5_run()
7651 conf->raid_disks, in raid5_run()
7652 conf->max_degraded)) in raid5_run()
7655 if (!only_parity(rdev->raid_disk, in raid5_run()
7656 conf->prev_algo, in raid5_run()
7657 conf->previous_raid_disks, in raid5_run()
7658 conf->max_degraded)) in raid5_run()
7666 mddev->degraded = raid5_calc_degraded(conf); in raid5_run()
7668 if (has_failed(conf)) { in raid5_run()
7670 mdname(mddev), mddev->degraded, conf->raid_disks); in raid5_run()
7675 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); in raid5_run()
7676 mddev->resync_max_sectors = mddev->dev_sectors; in raid5_run()
7678 if (mddev->degraded > dirty_parity_disks && in raid5_run()
7679 mddev->recovery_cp != MaxSector) { in raid5_run()
7680 if (test_bit(MD_HAS_PPL, &mddev->flags)) in raid5_run()
7683 else if (mddev->ok_start_degraded) in raid5_run()
7684 pr_crit("md/raid:%s: starting dirty degraded array - data corruption possible.\n", in raid5_run()
7694 mdname(mddev), conf->level, in raid5_run()
7695 mddev->raid_disks-mddev->degraded, mddev->raid_disks, in raid5_run()
7696 mddev->new_layout); in raid5_run()
7698 print_raid5_conf(conf); in raid5_run()
7700 if (conf->reshape_progress != MaxSector) { in raid5_run()
7701 conf->reshape_safe = conf->reshape_progress; in raid5_run()
7702 atomic_set(&conf->reshape_stripes, 0); in raid5_run()
7703 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); in raid5_run()
7704 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); in raid5_run()
7705 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); in raid5_run()
7706 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); in raid5_run()
7707 mddev->sync_thread = md_register_thread(md_do_sync, mddev, in raid5_run()
7709 if (!mddev->sync_thread) in raid5_run()
7714 if (mddev->to_remove == &raid5_attrs_group) in raid5_run()
7715 mddev->to_remove = NULL; in raid5_run()
7716 else if (mddev->kobj.sd && in raid5_run()
7717 sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) in raid5_run()
7722 if (mddev->queue) { in raid5_run()
7724 /* read-ahead size must cover two whole stripes, which in raid5_run()
7728 int data_disks = conf->previous_raid_disks - conf->max_degraded; in raid5_run()
7730 ((mddev->chunk_sectors << 9) / PAGE_SIZE); in raid5_run()
7732 chunk_size = mddev->chunk_sectors << 9; in raid5_run()
7733 blk_queue_io_min(mddev->queue, chunk_size); in raid5_run()
7734 raid5_set_io_opt(conf); in raid5_run()
7735 mddev->queue->limits.raid_partial_stripes_expensive = 1; in raid5_run()
7743 while ((stripe-1) & stripe) in raid5_run()
7744 stripe = (stripe | (stripe-1)) + 1; in raid5_run()
7745 mddev->queue->limits.discard_alignment = stripe; in raid5_run()
7746 mddev->queue->limits.discard_granularity = stripe; in raid5_run()
7748 blk_queue_max_write_same_sectors(mddev->queue, 0); in raid5_run()
7749 blk_queue_max_write_zeroes_sectors(mddev->queue, 0); in raid5_run()
7752 disk_stack_limits(mddev->gendisk, rdev->bdev, in raid5_run()
7753 rdev->data_offset << 9); in raid5_run()
7754 disk_stack_limits(mddev->gendisk, rdev->bdev, in raid5_run()
7755 rdev->new_data_offset << 9); in raid5_run()
7774 mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && in raid5_run()
7775 mddev->queue->limits.discard_granularity >= stripe) in raid5_run()
7777 mddev->queue); in raid5_run()
7780 mddev->queue); in raid5_run()
7782 blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); in raid5_run()
7785 if (log_init(conf, journal_dev, raid5_has_ppl(conf))) in raid5_run()
7790 md_unregister_thread(&mddev->thread); in raid5_run()
7791 print_raid5_conf(conf); in raid5_run()
7792 free_conf(conf); in raid5_run()
7793 mddev->private = NULL; in raid5_run()
7795 return -EIO; in raid5_run()
7800 struct r5conf *conf = priv; in raid5_free() local
7802 free_conf(conf); in raid5_free()
7803 mddev->to_remove = &raid5_attrs_group; in raid5_free()
7808 struct r5conf *conf = mddev->private; in raid5_status() local
7811 seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, in raid5_status()
7812 conf->chunk_sectors / 2, mddev->layout); in raid5_status()
7813 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); in raid5_status()
7815 for (i = 0; i < conf->raid_disks; i++) { in raid5_status()
7816 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_status()
7817 seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); in raid5_status()
7823 static void print_raid5_conf (struct r5conf *conf) in print_raid5_conf() argument
7828 pr_debug("RAID conf printout:\n"); in print_raid5_conf()
7829 if (!conf) { in print_raid5_conf()
7830 pr_debug("(conf==NULL)\n"); in print_raid5_conf()
7833 pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level, in print_raid5_conf()
7834 conf->raid_disks, in print_raid5_conf()
7835 conf->raid_disks - conf->mddev->degraded); in print_raid5_conf()
7837 for (i = 0; i < conf->raid_disks; i++) { in print_raid5_conf()
7839 tmp = conf->disks + i; in print_raid5_conf()
7840 if (tmp->rdev) in print_raid5_conf()
7842 i, !test_bit(Faulty, &tmp->rdev->flags), in print_raid5_conf()
7843 bdevname(tmp->rdev->bdev, b)); in print_raid5_conf()
7850 struct r5conf *conf = mddev->private; in raid5_spare_active() local
7855 for (i = 0; i < conf->raid_disks; i++) { in raid5_spare_active()
7856 tmp = conf->disks + i; in raid5_spare_active()
7857 if (tmp->replacement in raid5_spare_active()
7858 && tmp->replacement->recovery_offset == MaxSector in raid5_spare_active()
7859 && !test_bit(Faulty, &tmp->replacement->flags) in raid5_spare_active()
7860 && !test_and_set_bit(In_sync, &tmp->replacement->flags)) { in raid5_spare_active()
7862 if (!tmp->rdev in raid5_spare_active()
7863 || !test_and_clear_bit(In_sync, &tmp->rdev->flags)) in raid5_spare_active()
7865 if (tmp->rdev) { in raid5_spare_active()
7868 * and never re-added. in raid5_spare_active()
7870 set_bit(Faulty, &tmp->rdev->flags); in raid5_spare_active()
7872 tmp->rdev->sysfs_state); in raid5_spare_active()
7874 sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); in raid5_spare_active()
7875 } else if (tmp->rdev in raid5_spare_active()
7876 && tmp->rdev->recovery_offset == MaxSector in raid5_spare_active()
7877 && !test_bit(Faulty, &tmp->rdev->flags) in raid5_spare_active()
7878 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { in raid5_spare_active()
7880 sysfs_notify_dirent_safe(tmp->rdev->sysfs_state); in raid5_spare_active()
7883 spin_lock_irqsave(&conf->device_lock, flags); in raid5_spare_active()
7884 mddev->degraded = raid5_calc_degraded(conf); in raid5_spare_active()
7885 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_spare_active()
7886 print_raid5_conf(conf); in raid5_spare_active()
7892 struct r5conf *conf = mddev->private; in raid5_remove_disk() local
7894 int number = rdev->raid_disk; in raid5_remove_disk()
7896 struct disk_info *p = conf->disks + number; in raid5_remove_disk()
7898 print_raid5_conf(conf); in raid5_remove_disk()
7899 if (test_bit(Journal, &rdev->flags) && conf->log) { in raid5_remove_disk()
7906 if (atomic_read(&conf->active_stripes) || in raid5_remove_disk()
7907 atomic_read(&conf->r5c_cached_full_stripes) || in raid5_remove_disk()
7908 atomic_read(&conf->r5c_cached_partial_stripes)) { in raid5_remove_disk()
7909 return -EBUSY; in raid5_remove_disk()
7911 log_exit(conf); in raid5_remove_disk()
7914 if (rdev == p->rdev) in raid5_remove_disk()
7915 rdevp = &p->rdev; in raid5_remove_disk()
7916 else if (rdev == p->replacement) in raid5_remove_disk()
7917 rdevp = &p->replacement; in raid5_remove_disk()
7921 if (number >= conf->raid_disks && in raid5_remove_disk()
7922 conf->reshape_progress == MaxSector) in raid5_remove_disk()
7923 clear_bit(In_sync, &rdev->flags); in raid5_remove_disk()
7925 if (test_bit(In_sync, &rdev->flags) || in raid5_remove_disk()
7926 atomic_read(&rdev->nr_pending)) { in raid5_remove_disk()
7927 err = -EBUSY; in raid5_remove_disk()
7930 /* Only remove non-faulty devices if recovery in raid5_remove_disk()
7933 if (!test_bit(Faulty, &rdev->flags) && in raid5_remove_disk()
7934 mddev->recovery_disabled != conf->recovery_disabled && in raid5_remove_disk()
7935 !has_failed(conf) && in raid5_remove_disk()
7936 (!p->replacement || p->replacement == rdev) && in raid5_remove_disk()
7937 number < conf->raid_disks) { in raid5_remove_disk()
7938 err = -EBUSY; in raid5_remove_disk()
7942 if (!test_bit(RemoveSynchronized, &rdev->flags)) { in raid5_remove_disk()
7944 if (atomic_read(&rdev->nr_pending)) { in raid5_remove_disk()
7946 err = -EBUSY; in raid5_remove_disk()
7951 err = log_modify(conf, rdev, false); in raid5_remove_disk()
7955 if (p->replacement) { in raid5_remove_disk()
7957 p->rdev = p->replacement; in raid5_remove_disk()
7958 clear_bit(Replacement, &p->replacement->flags); in raid5_remove_disk()
7960 * but will never see neither - if they are careful in raid5_remove_disk()
7962 p->replacement = NULL; in raid5_remove_disk()
7965 err = log_modify(conf, p->rdev, true); in raid5_remove_disk()
7968 clear_bit(WantReplacement, &rdev->flags); in raid5_remove_disk()
7971 print_raid5_conf(conf); in raid5_remove_disk()
7977 struct r5conf *conf = mddev->private; in raid5_add_disk() local
7978 int ret, err = -EEXIST; in raid5_add_disk()
7982 int last = conf->raid_disks - 1; in raid5_add_disk()
7984 if (test_bit(Journal, &rdev->flags)) { in raid5_add_disk()
7985 if (conf->log) in raid5_add_disk()
7986 return -EBUSY; in raid5_add_disk()
7988 rdev->raid_disk = 0; in raid5_add_disk()
7993 ret = log_init(conf, rdev, false); in raid5_add_disk()
7997 ret = r5l_start(conf->log); in raid5_add_disk()
8003 if (mddev->recovery_disabled == conf->recovery_disabled) in raid5_add_disk()
8004 return -EBUSY; in raid5_add_disk()
8006 if (rdev->saved_raid_disk < 0 && has_failed(conf)) in raid5_add_disk()
8008 return -EINVAL; in raid5_add_disk()
8010 if (rdev->raid_disk >= 0) in raid5_add_disk()
8011 first = last = rdev->raid_disk; in raid5_add_disk()
8014 * find the disk ... but prefer rdev->saved_raid_disk in raid5_add_disk()
8017 if (rdev->saved_raid_disk >= 0 && in raid5_add_disk()
8018 rdev->saved_raid_disk >= first && in raid5_add_disk()
8019 rdev->saved_raid_disk <= last && in raid5_add_disk()
8020 conf->disks[rdev->saved_raid_disk].rdev == NULL) in raid5_add_disk()
8021 first = rdev->saved_raid_disk; in raid5_add_disk()
8024 p = conf->disks + disk; in raid5_add_disk()
8025 if (p->rdev == NULL) { in raid5_add_disk()
8026 clear_bit(In_sync, &rdev->flags); in raid5_add_disk()
8027 rdev->raid_disk = disk; in raid5_add_disk()
8028 if (rdev->saved_raid_disk != disk) in raid5_add_disk()
8029 conf->fullsync = 1; in raid5_add_disk()
8030 rcu_assign_pointer(p->rdev, rdev); in raid5_add_disk()
8032 err = log_modify(conf, rdev, true); in raid5_add_disk()
8038 p = conf->disks + disk; in raid5_add_disk()
8039 if (test_bit(WantReplacement, &p->rdev->flags) && in raid5_add_disk()
8040 p->replacement == NULL) { in raid5_add_disk()
8041 clear_bit(In_sync, &rdev->flags); in raid5_add_disk()
8042 set_bit(Replacement, &rdev->flags); in raid5_add_disk()
8043 rdev->raid_disk = disk; in raid5_add_disk()
8045 conf->fullsync = 1; in raid5_add_disk()
8046 rcu_assign_pointer(p->replacement, rdev); in raid5_add_disk()
8051 print_raid5_conf(conf); in raid5_add_disk()
8065 struct r5conf *conf = mddev->private; in raid5_resize() local
8067 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in raid5_resize()
8068 return -EINVAL; in raid5_resize()
8069 sectors &= ~((sector_t)conf->chunk_sectors - 1); in raid5_resize()
8070 newsize = raid5_size(mddev, sectors, mddev->raid_disks); in raid5_resize()
8071 if (mddev->external_size && in raid5_resize()
8072 mddev->array_sectors > newsize) in raid5_resize()
8073 return -EINVAL; in raid5_resize()
8074 if (mddev->bitmap) { in raid5_resize()
8075 int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0); in raid5_resize()
8080 if (sectors > mddev->dev_sectors && in raid5_resize()
8081 mddev->recovery_cp > mddev->dev_sectors) { in raid5_resize()
8082 mddev->recovery_cp = mddev->dev_sectors; in raid5_resize()
8083 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); in raid5_resize()
8085 mddev->dev_sectors = sectors; in raid5_resize()
8086 mddev->resync_max_sectors = sectors; in raid5_resize()
8097 * If the chunk size is greater, user-space should request more in check_stripe_cache()
8100 struct r5conf *conf = mddev->private; in check_stripe_cache() local
8101 if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 in check_stripe_cache()
8102 > conf->min_nr_stripes || in check_stripe_cache()
8103 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 in check_stripe_cache()
8104 > conf->min_nr_stripes) { in check_stripe_cache()
8107 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) in check_stripe_cache()
8108 / RAID5_STRIPE_SIZE(conf))*4); in check_stripe_cache()
8116 struct r5conf *conf = mddev->private; in check_reshape() local
8118 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in check_reshape()
8119 return -EINVAL; in check_reshape()
8120 if (mddev->delta_disks == 0 && in check_reshape()
8121 mddev->new_layout == mddev->layout && in check_reshape()
8122 mddev->new_chunk_sectors == mddev->chunk_sectors) in check_reshape()
8124 if (has_failed(conf)) in check_reshape()
8125 return -EINVAL; in check_reshape()
8126 if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) { in check_reshape()
8133 if (mddev->level == 6) in check_reshape()
8135 if (mddev->raid_disks + mddev->delta_disks < min) in check_reshape()
8136 return -EINVAL; in check_reshape()
8140 return -ENOSPC; in check_reshape()
8142 if (mddev->new_chunk_sectors > mddev->chunk_sectors || in check_reshape()
8143 mddev->delta_disks > 0) in check_reshape()
8144 if (resize_chunks(conf, in check_reshape()
8145 conf->previous_raid_disks in check_reshape()
8146 + max(0, mddev->delta_disks), in check_reshape()
8147 max(mddev->new_chunk_sectors, in check_reshape()
8148 mddev->chunk_sectors) in check_reshape()
8150 return -ENOMEM; in check_reshape()
8152 if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size) in check_reshape()
8154 return resize_stripes(conf, (conf->previous_raid_disks in check_reshape()
8155 + mddev->delta_disks)); in check_reshape()
8160 struct r5conf *conf = mddev->private; in raid5_start_reshape() local
8165 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) in raid5_start_reshape()
8166 return -EBUSY; in raid5_start_reshape()
8169 return -ENOSPC; in raid5_start_reshape()
8171 if (has_failed(conf)) in raid5_start_reshape()
8172 return -EINVAL; in raid5_start_reshape()
8175 if (!test_bit(In_sync, &rdev->flags) in raid5_start_reshape()
8176 && !test_bit(Faulty, &rdev->flags)) in raid5_start_reshape()
8180 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) in raid5_start_reshape()
8184 return -EINVAL; in raid5_start_reshape()
8190 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) in raid5_start_reshape()
8191 < mddev->array_sectors) { in raid5_start_reshape()
8194 return -EINVAL; in raid5_start_reshape()
8197 atomic_set(&conf->reshape_stripes, 0); in raid5_start_reshape()
8198 spin_lock_irq(&conf->device_lock); in raid5_start_reshape()
8199 write_seqcount_begin(&conf->gen_lock); in raid5_start_reshape()
8200 conf->previous_raid_disks = conf->raid_disks; in raid5_start_reshape()
8201 conf->raid_disks += mddev->delta_disks; in raid5_start_reshape()
8202 conf->prev_chunk_sectors = conf->chunk_sectors; in raid5_start_reshape()
8203 conf->chunk_sectors = mddev->new_chunk_sectors; in raid5_start_reshape()
8204 conf->prev_algo = conf->algorithm; in raid5_start_reshape()
8205 conf->algorithm = mddev->new_layout; in raid5_start_reshape()
8206 conf->generation++; in raid5_start_reshape()
8208 * if reshape_progress has been set - so a memory barrier needed. in raid5_start_reshape()
8211 if (mddev->reshape_backwards) in raid5_start_reshape()
8212 conf->reshape_progress = raid5_size(mddev, 0, 0); in raid5_start_reshape()
8214 conf->reshape_progress = 0; in raid5_start_reshape()
8215 conf->reshape_safe = conf->reshape_progress; in raid5_start_reshape()
8216 write_seqcount_end(&conf->gen_lock); in raid5_start_reshape()
8217 spin_unlock_irq(&conf->device_lock); in raid5_start_reshape()
8220 * the reshape wasn't running - like Discard or Read - have in raid5_start_reshape()
8233 if (mddev->delta_disks >= 0) { in raid5_start_reshape()
8235 if (rdev->raid_disk < 0 && in raid5_start_reshape()
8236 !test_bit(Faulty, &rdev->flags)) { in raid5_start_reshape()
8238 if (rdev->raid_disk in raid5_start_reshape()
8239 >= conf->previous_raid_disks) in raid5_start_reshape()
8240 set_bit(In_sync, &rdev->flags); in raid5_start_reshape()
8242 rdev->recovery_offset = 0; in raid5_start_reshape()
8247 } else if (rdev->raid_disk >= conf->previous_raid_disks in raid5_start_reshape()
8248 && !test_bit(Faulty, &rdev->flags)) { in raid5_start_reshape()
8250 set_bit(In_sync, &rdev->flags); in raid5_start_reshape()
8254 * ->degraded is measured against the larger of the in raid5_start_reshape()
8257 spin_lock_irqsave(&conf->device_lock, flags); in raid5_start_reshape()
8258 mddev->degraded = raid5_calc_degraded(conf); in raid5_start_reshape()
8259 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_start_reshape()
8261 mddev->raid_disks = conf->raid_disks; in raid5_start_reshape()
8262 mddev->reshape_position = conf->reshape_progress; in raid5_start_reshape()
8263 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in raid5_start_reshape()
8265 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); in raid5_start_reshape()
8266 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); in raid5_start_reshape()
8267 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); in raid5_start_reshape()
8268 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); in raid5_start_reshape()
8269 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); in raid5_start_reshape()
8270 mddev->sync_thread = md_register_thread(md_do_sync, mddev, in raid5_start_reshape()
8272 if (!mddev->sync_thread) { in raid5_start_reshape()
8273 mddev->recovery = 0; in raid5_start_reshape()
8274 spin_lock_irq(&conf->device_lock); in raid5_start_reshape()
8275 write_seqcount_begin(&conf->gen_lock); in raid5_start_reshape()
8276 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; in raid5_start_reshape()
8277 mddev->new_chunk_sectors = in raid5_start_reshape()
8278 conf->chunk_sectors = conf->prev_chunk_sectors; in raid5_start_reshape()
8279 mddev->new_layout = conf->algorithm = conf->prev_algo; in raid5_start_reshape()
8281 rdev->new_data_offset = rdev->data_offset; in raid5_start_reshape()
8283 conf->generation --; in raid5_start_reshape()
8284 conf->reshape_progress = MaxSector; in raid5_start_reshape()
8285 mddev->reshape_position = MaxSector; in raid5_start_reshape()
8286 write_seqcount_end(&conf->gen_lock); in raid5_start_reshape()
8287 spin_unlock_irq(&conf->device_lock); in raid5_start_reshape()
8288 return -EAGAIN; in raid5_start_reshape()
8290 conf->reshape_checkpoint = jiffies; in raid5_start_reshape()
8291 md_wakeup_thread(mddev->sync_thread); in raid5_start_reshape()
8297 * changes needed in 'conf'
8299 static void end_reshape(struct r5conf *conf) in end_reshape() argument
8302 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { in end_reshape()
8305 spin_lock_irq(&conf->device_lock); in end_reshape()
8306 conf->previous_raid_disks = conf->raid_disks; in end_reshape()
8307 md_finish_reshape(conf->mddev); in end_reshape()
8309 conf->reshape_progress = MaxSector; in end_reshape()
8310 conf->mddev->reshape_position = MaxSector; in end_reshape()
8311 rdev_for_each(rdev, conf->mddev) in end_reshape()
8312 if (rdev->raid_disk >= 0 && in end_reshape()
8313 !test_bit(Journal, &rdev->flags) && in end_reshape()
8314 !test_bit(In_sync, &rdev->flags)) in end_reshape()
8315 rdev->recovery_offset = MaxSector; in end_reshape()
8316 spin_unlock_irq(&conf->device_lock); in end_reshape()
8317 wake_up(&conf->wait_for_overlap); in end_reshape()
8319 if (conf->mddev->queue) in end_reshape()
8320 raid5_set_io_opt(conf); in end_reshape()
8329 struct r5conf *conf = mddev->private; in raid5_finish_reshape() local
8331 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { in raid5_finish_reshape()
8333 if (mddev->delta_disks <= 0) { in raid5_finish_reshape()
8335 spin_lock_irq(&conf->device_lock); in raid5_finish_reshape()
8336 mddev->degraded = raid5_calc_degraded(conf); in raid5_finish_reshape()
8337 spin_unlock_irq(&conf->device_lock); in raid5_finish_reshape()
8338 for (d = conf->raid_disks ; in raid5_finish_reshape()
8339 d < conf->raid_disks - mddev->delta_disks; in raid5_finish_reshape()
8341 struct md_rdev *rdev = conf->disks[d].rdev; in raid5_finish_reshape()
8343 clear_bit(In_sync, &rdev->flags); in raid5_finish_reshape()
8344 rdev = conf->disks[d].replacement; in raid5_finish_reshape()
8346 clear_bit(In_sync, &rdev->flags); in raid5_finish_reshape()
8349 mddev->layout = conf->algorithm; in raid5_finish_reshape()
8350 mddev->chunk_sectors = conf->chunk_sectors; in raid5_finish_reshape()
8351 mddev->reshape_position = MaxSector; in raid5_finish_reshape()
8352 mddev->delta_disks = 0; in raid5_finish_reshape()
8353 mddev->reshape_backwards = 0; in raid5_finish_reshape()
8359 struct r5conf *conf = mddev->private; in raid5_quiesce() local
8363 lock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8367 r5c_flush_cache(conf, INT_MAX); in raid5_quiesce()
8368 conf->quiesce = 2; in raid5_quiesce()
8369 wait_event_cmd(conf->wait_for_quiescent, in raid5_quiesce()
8370 atomic_read(&conf->active_stripes) == 0 && in raid5_quiesce()
8371 atomic_read(&conf->active_aligned_reads) == 0, in raid5_quiesce()
8372 unlock_all_device_hash_locks_irq(conf), in raid5_quiesce()
8373 lock_all_device_hash_locks_irq(conf)); in raid5_quiesce()
8374 conf->quiesce = 1; in raid5_quiesce()
8375 unlock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8377 wake_up(&conf->wait_for_overlap); in raid5_quiesce()
8379 /* re-enable writes */ in raid5_quiesce()
8380 lock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8381 conf->quiesce = 0; in raid5_quiesce()
8382 wake_up(&conf->wait_for_quiescent); in raid5_quiesce()
8383 wake_up(&conf->wait_for_overlap); in raid5_quiesce()
8384 unlock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8386 log_quiesce(conf, quiesce); in raid5_quiesce()
8391 struct r0conf *raid0_conf = mddev->private; in raid45_takeover_raid0()
8395 if (raid0_conf->nr_strip_zones > 1) { in raid45_takeover_raid0()
8398 return ERR_PTR(-EINVAL); in raid45_takeover_raid0()
8401 sectors = raid0_conf->strip_zone[0].zone_end; in raid45_takeover_raid0()
8402 sector_div(sectors, raid0_conf->strip_zone[0].nb_dev); in raid45_takeover_raid0()
8403 mddev->dev_sectors = sectors; in raid45_takeover_raid0()
8404 mddev->new_level = level; in raid45_takeover_raid0()
8405 mddev->new_layout = ALGORITHM_PARITY_N; in raid45_takeover_raid0()
8406 mddev->new_chunk_sectors = mddev->chunk_sectors; in raid45_takeover_raid0()
8407 mddev->raid_disks += 1; in raid45_takeover_raid0()
8408 mddev->delta_disks = 1; in raid45_takeover_raid0()
8410 mddev->recovery_cp = MaxSector; in raid45_takeover_raid0()
8420 if (mddev->raid_disks != 2 || in raid5_takeover_raid1()
8421 mddev->degraded > 1) in raid5_takeover_raid1()
8422 return ERR_PTR(-EINVAL); in raid5_takeover_raid1()
8424 /* Should check if there are write-behind devices? */ in raid5_takeover_raid1()
8429 while (chunksect && (mddev->array_sectors & (chunksect-1))) in raid5_takeover_raid1()
8432 if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private)) in raid5_takeover_raid1()
8434 return ERR_PTR(-EINVAL); in raid5_takeover_raid1()
8436 mddev->new_level = 5; in raid5_takeover_raid1()
8437 mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; in raid5_takeover_raid1()
8438 mddev->new_chunk_sectors = chunksect; in raid5_takeover_raid1()
8451 switch (mddev->layout) { in raid5_takeover_raid6()
8471 return ERR_PTR(-EINVAL); in raid5_takeover_raid6()
8473 mddev->new_level = 5; in raid5_takeover_raid6()
8474 mddev->new_layout = new_layout; in raid5_takeover_raid6()
8475 mddev->delta_disks = -1; in raid5_takeover_raid6()
8476 mddev->raid_disks -= 1; in raid5_takeover_raid6()
8482 /* For a 2-drive array, the layout and chunk size can be changed in raid5_check_reshape()
8484 * For larger arrays we record the new value - after validation in raid5_check_reshape()
8487 struct r5conf *conf = mddev->private; in raid5_check_reshape() local
8488 int new_chunk = mddev->new_chunk_sectors; in raid5_check_reshape()
8490 if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) in raid5_check_reshape()
8491 return -EINVAL; in raid5_check_reshape()
8494 return -EINVAL; in raid5_check_reshape()
8496 return -EINVAL; in raid5_check_reshape()
8497 if (mddev->array_sectors & (new_chunk-1)) in raid5_check_reshape()
8499 return -EINVAL; in raid5_check_reshape()
8504 if (mddev->raid_disks == 2) { in raid5_check_reshape()
8506 if (mddev->new_layout >= 0) { in raid5_check_reshape()
8507 conf->algorithm = mddev->new_layout; in raid5_check_reshape()
8508 mddev->layout = mddev->new_layout; in raid5_check_reshape()
8511 conf->chunk_sectors = new_chunk ; in raid5_check_reshape()
8512 mddev->chunk_sectors = new_chunk; in raid5_check_reshape()
8514 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in raid5_check_reshape()
8515 md_wakeup_thread(mddev->thread); in raid5_check_reshape()
8522 int new_chunk = mddev->new_chunk_sectors; in raid6_check_reshape()
8524 if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) in raid6_check_reshape()
8525 return -EINVAL; in raid6_check_reshape()
8528 return -EINVAL; in raid6_check_reshape()
8530 return -EINVAL; in raid6_check_reshape()
8531 if (mddev->array_sectors & (new_chunk-1)) in raid6_check_reshape()
8533 return -EINVAL; in raid6_check_reshape()
8543 * raid0 - if there is only one strip zone - make it a raid4 layout in raid5_takeover()
8544 * raid1 - if there are two drives. We need to know the chunk size in raid5_takeover()
8545 * raid4 - trivial - just use a raid4 layout. in raid5_takeover()
8546 * raid6 - Providing it is a *_6 layout in raid5_takeover()
8548 if (mddev->level == 0) in raid5_takeover()
8550 if (mddev->level == 1) in raid5_takeover()
8552 if (mddev->level == 4) { in raid5_takeover()
8553 mddev->new_layout = ALGORITHM_PARITY_N; in raid5_takeover()
8554 mddev->new_level = 5; in raid5_takeover()
8557 if (mddev->level == 6) in raid5_takeover()
8560 return ERR_PTR(-EINVAL); in raid5_takeover()
8566 * raid0 - if there is only one strip zone in raid4_takeover()
8567 * raid5 - if layout is right in raid4_takeover()
8569 if (mddev->level == 0) in raid4_takeover()
8571 if (mddev->level == 5 && in raid4_takeover()
8572 mddev->layout == ALGORITHM_PARITY_N) { in raid4_takeover()
8573 mddev->new_layout = 0; in raid4_takeover()
8574 mddev->new_level = 4; in raid4_takeover()
8577 return ERR_PTR(-EINVAL); in raid4_takeover()
8590 if (mddev->pers != &raid5_personality) in raid6_takeover()
8591 return ERR_PTR(-EINVAL); in raid6_takeover()
8592 if (mddev->degraded > 1) in raid6_takeover()
8593 return ERR_PTR(-EINVAL); in raid6_takeover()
8594 if (mddev->raid_disks > 253) in raid6_takeover()
8595 return ERR_PTR(-EINVAL); in raid6_takeover()
8596 if (mddev->raid_disks < 3) in raid6_takeover()
8597 return ERR_PTR(-EINVAL); in raid6_takeover()
8599 switch (mddev->layout) { in raid6_takeover()
8619 return ERR_PTR(-EINVAL); in raid6_takeover()
8621 mddev->new_level = 6; in raid6_takeover()
8622 mddev->new_layout = new_layout; in raid6_takeover()
8623 mddev->delta_disks = 1; in raid6_takeover()
8624 mddev->raid_disks += 1; in raid6_takeover()
8630 struct r5conf *conf; in raid5_change_consistency_policy() local
8636 conf = mddev->private; in raid5_change_consistency_policy()
8637 if (!conf) { in raid5_change_consistency_policy()
8639 return -ENODEV; in raid5_change_consistency_policy()
8644 if (!raid5_has_ppl(conf) && conf->level == 5) { in raid5_change_consistency_policy()
8645 err = log_init(conf, NULL, true); in raid5_change_consistency_policy()
8647 err = resize_stripes(conf, conf->pool_size); in raid5_change_consistency_policy()
8649 log_exit(conf); in raid5_change_consistency_policy()
8652 err = -EINVAL; in raid5_change_consistency_policy()
8654 if (raid5_has_ppl(conf)) { in raid5_change_consistency_policy()
8656 log_exit(conf); in raid5_change_consistency_policy()
8658 err = resize_stripes(conf, conf->pool_size); in raid5_change_consistency_policy()
8659 } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && in raid5_change_consistency_policy()
8660 r5l_log_disk_error(conf)) { in raid5_change_consistency_policy()
8665 if (test_bit(Journal, &rdev->flags)) { in raid5_change_consistency_policy()
8672 clear_bit(MD_HAS_JOURNAL, &mddev->flags); in raid5_change_consistency_policy()
8675 err = -EBUSY; in raid5_change_consistency_policy()
8677 err = -EINVAL; in raid5_change_consistency_policy()
8679 err = -EINVAL; in raid5_change_consistency_policy()
8692 struct r5conf *conf = mddev->private; in raid5_start() local
8694 return r5l_start(conf->log); in raid5_start()
8778 return -ENOMEM; in raid5_init()
8807 MODULE_ALIAS("md-personality-4"); /* RAID5 */
8808 MODULE_ALIAS("md-raid5");
8809 MODULE_ALIAS("md-raid4");
8810 MODULE_ALIAS("md-level-5");
8811 MODULE_ALIAS("md-level-4");
8812 MODULE_ALIAS("md-personality-8"); /* RAID6 */
8813 MODULE_ALIAS("md-raid6");
8814 MODULE_ALIAS("md-level-6");