• Home
  • Raw
  • Download

Lines Matching +full:conf +full:- +full:tx

1 // SPDX-License-Identifier: GPL-2.0-or-later
8 * RAID-4/5/6 management functions.
9 * Thanks to Penguin Computing for making the RAID-6 development possible
22 * conf->seq_write is the number of the last batch successfully written.
23 * conf->seq_flush is the number of the last batch that was closed to
26 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
57 #include "md-bitmap.h"
58 #include "raid5-log.h"
73 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) in stripe_hash() argument
75 int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK; in stripe_hash()
76 return &conf->stripe_hashtbl[hash]; in stripe_hash()
79 static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect) in stripe_hash_locks_hash() argument
81 return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK; in stripe_hash_locks_hash()
84 static inline void lock_device_hash_lock(struct r5conf *conf, int hash) in lock_device_hash_lock() argument
85 __acquires(&conf->device_lock) in lock_device_hash_lock()
87 spin_lock_irq(conf->hash_locks + hash); in lock_device_hash_lock()
88 spin_lock(&conf->device_lock); in lock_device_hash_lock()
91 static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) in unlock_device_hash_lock() argument
92 __releases(&conf->device_lock) in unlock_device_hash_lock()
94 spin_unlock(&conf->device_lock); in unlock_device_hash_lock()
95 spin_unlock_irq(conf->hash_locks + hash); in unlock_device_hash_lock()
98 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) in lock_all_device_hash_locks_irq() argument
99 __acquires(&conf->device_lock) in lock_all_device_hash_locks_irq()
102 spin_lock_irq(conf->hash_locks); in lock_all_device_hash_locks_irq()
104 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); in lock_all_device_hash_locks_irq()
105 spin_lock(&conf->device_lock); in lock_all_device_hash_locks_irq()
108 static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) in unlock_all_device_hash_locks_irq() argument
109 __releases(&conf->device_lock) in unlock_all_device_hash_locks_irq()
112 spin_unlock(&conf->device_lock); in unlock_all_device_hash_locks_irq()
113 for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) in unlock_all_device_hash_locks_irq()
114 spin_unlock(conf->hash_locks + i); in unlock_all_device_hash_locks_irq()
115 spin_unlock_irq(conf->hash_locks); in unlock_all_device_hash_locks_irq()
121 if (sh->ddf_layout) in raid6_d0()
125 if (sh->qd_idx == sh->disks - 1) in raid6_d0()
128 return sh->qd_idx + 1; in raid6_d0()
138 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
139 * is raid_disks-1. This help does that mapping.
146 if (sh->ddf_layout) in raid6_idx_to_slot()
148 if (idx == sh->pd_idx) in raid6_idx_to_slot()
150 if (idx == sh->qd_idx) in raid6_idx_to_slot()
152 if (!sh->ddf_layout) in raid6_idx_to_slot()
157 static void print_raid5_conf (struct r5conf *conf);
161 return sh->check_state || sh->reconstruct_state || in stripe_operations_active()
162 test_bit(STRIPE_BIOFILL_RUN, &sh->state) || in stripe_operations_active()
163 test_bit(STRIPE_COMPUTE_RUN, &sh->state); in stripe_operations_active()
168 return (test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) || in stripe_is_lowprio()
169 test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) && in stripe_is_lowprio()
170 !test_bit(STRIPE_R5C_CACHING, &sh->state); in stripe_is_lowprio()
174 __must_hold(&sh->raid_conf->device_lock) in raid5_wakeup_stripe_thread()
176 struct r5conf *conf = sh->raid_conf; in raid5_wakeup_stripe_thread() local
179 int i, cpu = sh->cpu; in raid5_wakeup_stripe_thread()
183 sh->cpu = cpu; in raid5_wakeup_stripe_thread()
186 if (list_empty(&sh->lru)) { in raid5_wakeup_stripe_thread()
188 group = conf->worker_groups + cpu_to_group(cpu); in raid5_wakeup_stripe_thread()
190 list_add_tail(&sh->lru, &group->loprio_list); in raid5_wakeup_stripe_thread()
192 list_add_tail(&sh->lru, &group->handle_list); in raid5_wakeup_stripe_thread()
193 group->stripes_cnt++; in raid5_wakeup_stripe_thread()
194 sh->group = group; in raid5_wakeup_stripe_thread()
197 if (conf->worker_cnt_per_group == 0) { in raid5_wakeup_stripe_thread()
198 md_wakeup_thread(conf->mddev->thread); in raid5_wakeup_stripe_thread()
202 group = conf->worker_groups + cpu_to_group(sh->cpu); in raid5_wakeup_stripe_thread()
204 group->workers[0].working = true; in raid5_wakeup_stripe_thread()
206 queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work); in raid5_wakeup_stripe_thread()
208 thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1; in raid5_wakeup_stripe_thread()
210 for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) { in raid5_wakeup_stripe_thread()
211 if (group->workers[i].working == false) { in raid5_wakeup_stripe_thread()
212 group->workers[i].working = true; in raid5_wakeup_stripe_thread()
213 queue_work_on(sh->cpu, raid5_wq, in raid5_wakeup_stripe_thread()
214 &group->workers[i].work); in raid5_wakeup_stripe_thread()
215 thread_cnt--; in raid5_wakeup_stripe_thread()
220 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, in do_release_stripe() argument
222 __must_hold(&conf->device_lock) in do_release_stripe()
227 BUG_ON(!list_empty(&sh->lru)); in do_release_stripe()
228 BUG_ON(atomic_read(&conf->active_stripes)==0); in do_release_stripe()
230 if (r5c_is_writeback(conf->log)) in do_release_stripe()
231 for (i = sh->disks; i--; ) in do_release_stripe()
232 if (test_bit(R5_InJournal, &sh->dev[i].flags)) in do_release_stripe()
241 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || in do_release_stripe()
242 (conf->quiesce && r5c_is_writeback(conf->log) && in do_release_stripe()
243 !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { in do_release_stripe()
244 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) in do_release_stripe()
246 set_bit(STRIPE_HANDLE, &sh->state); in do_release_stripe()
249 if (test_bit(STRIPE_HANDLE, &sh->state)) { in do_release_stripe()
250 if (test_bit(STRIPE_DELAYED, &sh->state) && in do_release_stripe()
251 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in do_release_stripe()
252 list_add_tail(&sh->lru, &conf->delayed_list); in do_release_stripe()
253 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && in do_release_stripe()
254 sh->bm_seq - conf->seq_write > 0) in do_release_stripe()
255 list_add_tail(&sh->lru, &conf->bitmap_list); in do_release_stripe()
257 clear_bit(STRIPE_DELAYED, &sh->state); in do_release_stripe()
258 clear_bit(STRIPE_BIT_DELAY, &sh->state); in do_release_stripe()
259 if (conf->worker_cnt_per_group == 0) { in do_release_stripe()
261 list_add_tail(&sh->lru, in do_release_stripe()
262 &conf->loprio_list); in do_release_stripe()
264 list_add_tail(&sh->lru, in do_release_stripe()
265 &conf->handle_list); in do_release_stripe()
271 md_wakeup_thread(conf->mddev->thread); in do_release_stripe()
274 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in do_release_stripe()
275 if (atomic_dec_return(&conf->preread_active_stripes) in do_release_stripe()
277 md_wakeup_thread(conf->mddev->thread); in do_release_stripe()
278 atomic_dec(&conf->active_stripes); in do_release_stripe()
279 if (!test_bit(STRIPE_EXPANDING, &sh->state)) { in do_release_stripe()
280 if (!r5c_is_writeback(conf->log)) in do_release_stripe()
281 list_add_tail(&sh->lru, temp_inactive_list); in do_release_stripe()
283 WARN_ON(test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)); in do_release_stripe()
285 list_add_tail(&sh->lru, temp_inactive_list); in do_release_stripe()
286 else if (injournal == conf->raid_disks - conf->max_degraded) { in do_release_stripe()
288 if (!test_and_set_bit(STRIPE_R5C_FULL_STRIPE, &sh->state)) in do_release_stripe()
289 atomic_inc(&conf->r5c_cached_full_stripes); in do_release_stripe()
290 if (test_and_clear_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state)) in do_release_stripe()
291 atomic_dec(&conf->r5c_cached_partial_stripes); in do_release_stripe()
292 list_add_tail(&sh->lru, &conf->r5c_full_stripe_list); in do_release_stripe()
293 r5c_check_cached_full_stripe(conf); in do_release_stripe()
300 list_add_tail(&sh->lru, &conf->r5c_partial_stripe_list); in do_release_stripe()
306 static void __release_stripe(struct r5conf *conf, struct stripe_head *sh, in __release_stripe() argument
308 __must_hold(&conf->device_lock) in __release_stripe()
310 if (atomic_dec_and_test(&sh->count)) in __release_stripe()
311 do_release_stripe(conf, sh, temp_inactive_list); in __release_stripe()
321 static void release_inactive_stripe_list(struct r5conf *conf, in release_inactive_stripe_list() argument
331 hash = NR_STRIPE_HASH_LOCKS - 1; in release_inactive_stripe_list()
335 struct list_head *list = &temp_inactive_list[size - 1]; in release_inactive_stripe_list()
342 spin_lock_irqsave(conf->hash_locks + hash, flags); in release_inactive_stripe_list()
343 if (list_empty(conf->inactive_list + hash) && in release_inactive_stripe_list()
345 atomic_dec(&conf->empty_inactive_list_nr); in release_inactive_stripe_list()
346 list_splice_tail_init(list, conf->inactive_list + hash); in release_inactive_stripe_list()
348 spin_unlock_irqrestore(conf->hash_locks + hash, flags); in release_inactive_stripe_list()
350 size--; in release_inactive_stripe_list()
351 hash--; in release_inactive_stripe_list()
355 wake_up(&conf->wait_for_stripe); in release_inactive_stripe_list()
356 if (atomic_read(&conf->active_stripes) == 0) in release_inactive_stripe_list()
357 wake_up(&conf->wait_for_quiescent); in release_inactive_stripe_list()
358 if (conf->retry_read_aligned) in release_inactive_stripe_list()
359 md_wakeup_thread(conf->mddev->thread); in release_inactive_stripe_list()
363 static int release_stripe_list(struct r5conf *conf, in release_stripe_list() argument
365 __must_hold(&conf->device_lock) in release_stripe_list()
371 head = llist_del_all(&conf->released_stripes); in release_stripe_list()
378 clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state); in release_stripe_list()
384 hash = sh->hash_lock_index; in release_stripe_list()
385 __release_stripe(conf, sh, &temp_inactive_list[hash]); in release_stripe_list()
394 struct r5conf *conf = sh->raid_conf; in raid5_release_stripe() local
402 if (atomic_add_unless(&sh->count, -1, 1)) in raid5_release_stripe()
405 if (unlikely(!conf->mddev->thread) || in raid5_release_stripe()
406 test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) in raid5_release_stripe()
408 wakeup = llist_add(&sh->release_list, &conf->released_stripes); in raid5_release_stripe()
410 md_wakeup_thread(conf->mddev->thread); in raid5_release_stripe()
414 if (atomic_dec_and_lock_irqsave(&sh->count, &conf->device_lock, flags)) { in raid5_release_stripe()
416 hash = sh->hash_lock_index; in raid5_release_stripe()
417 do_release_stripe(conf, sh, &list); in raid5_release_stripe()
418 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_release_stripe()
419 release_inactive_stripe_list(conf, &list, hash); in raid5_release_stripe()
426 (unsigned long long)sh->sector); in remove_hash()
428 hlist_del_init(&sh->hash); in remove_hash()
431 static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh) in insert_hash() argument
433 struct hlist_head *hp = stripe_hash(conf, sh->sector); in insert_hash()
436 (unsigned long long)sh->sector); in insert_hash()
438 hlist_add_head(&sh->hash, hp); in insert_hash()
442 static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash) in get_free_stripe() argument
447 if (list_empty(conf->inactive_list + hash)) in get_free_stripe()
449 first = (conf->inactive_list + hash)->next; in get_free_stripe()
453 atomic_inc(&conf->active_stripes); in get_free_stripe()
454 BUG_ON(hash != sh->hash_lock_index); in get_free_stripe()
455 if (list_empty(conf->inactive_list + hash)) in get_free_stripe()
456 atomic_inc(&conf->empty_inactive_list_nr); in get_free_stripe()
468 if (!sh->pages) in free_stripe_pages()
471 for (i = 0; i < sh->nr_pages; i++) { in free_stripe_pages()
472 p = sh->pages[i]; in free_stripe_pages()
475 sh->pages[i] = NULL; in free_stripe_pages()
484 for (i = 0; i < sh->nr_pages; i++) { in alloc_stripe_pages()
486 if (sh->pages[i]) in alloc_stripe_pages()
492 return -ENOMEM; in alloc_stripe_pages()
494 sh->pages[i] = p; in alloc_stripe_pages()
500 init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks) in init_stripe_shared_pages() argument
504 if (sh->pages) in init_stripe_shared_pages()
507 /* Each of the sh->dev[i] need one conf->stripe_size */ in init_stripe_shared_pages()
508 cnt = PAGE_SIZE / conf->stripe_size; in init_stripe_shared_pages()
509 nr_pages = (disks + cnt - 1) / cnt; in init_stripe_shared_pages()
511 sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); in init_stripe_shared_pages()
512 if (!sh->pages) in init_stripe_shared_pages()
513 return -ENOMEM; in init_stripe_shared_pages()
514 sh->nr_pages = nr_pages; in init_stripe_shared_pages()
515 sh->stripes_per_page = cnt; in init_stripe_shared_pages()
523 int num = sh->raid_conf->pool_size; in shrink_buffers()
529 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); in shrink_buffers()
530 p = sh->dev[i].page; in shrink_buffers()
533 sh->dev[i].page = NULL; in shrink_buffers()
538 sh->dev[i].page = NULL; in shrink_buffers()
546 int num = sh->raid_conf->pool_size; in grow_buffers()
555 sh->dev[i].page = page; in grow_buffers()
556 sh->dev[i].orig_page = page; in grow_buffers()
557 sh->dev[i].offset = 0; in grow_buffers()
561 return -ENOMEM; in grow_buffers()
564 sh->dev[i].page = raid5_get_dev_page(sh, i); in grow_buffers()
565 sh->dev[i].orig_page = sh->dev[i].page; in grow_buffers()
566 sh->dev[i].offset = raid5_get_page_offset(sh, i); in grow_buffers()
572 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
577 struct r5conf *conf = sh->raid_conf; in init_stripe() local
580 BUG_ON(atomic_read(&sh->count) != 0); in init_stripe()
581 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); in init_stripe()
583 BUG_ON(sh->batch_head); in init_stripe()
588 seq = read_seqcount_begin(&conf->gen_lock); in init_stripe()
589 sh->generation = conf->generation - previous; in init_stripe()
590 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; in init_stripe()
591 sh->sector = sector; in init_stripe()
592 stripe_set_idx(sector, conf, previous, sh); in init_stripe()
593 sh->state = 0; in init_stripe()
595 for (i = sh->disks; i--; ) { in init_stripe()
596 struct r5dev *dev = &sh->dev[i]; in init_stripe()
598 if (dev->toread || dev->read || dev->towrite || dev->written || in init_stripe()
599 test_bit(R5_LOCKED, &dev->flags)) { in init_stripe()
601 (unsigned long long)sh->sector, i, dev->toread, in init_stripe()
602 dev->read, dev->towrite, dev->written, in init_stripe()
603 test_bit(R5_LOCKED, &dev->flags)); in init_stripe()
606 dev->flags = 0; in init_stripe()
607 dev->sector = raid5_compute_blocknr(sh, i, previous); in init_stripe()
609 if (read_seqcount_retry(&conf->gen_lock, seq)) in init_stripe()
611 sh->overwrite_disks = 0; in init_stripe()
612 insert_hash(conf, sh); in init_stripe()
613 sh->cpu = smp_processor_id(); in init_stripe()
614 set_bit(STRIPE_BATCH_READY, &sh->state); in init_stripe()
617 static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector, in __find_stripe() argument
623 hlist_for_each_entry(sh, stripe_hash(conf, sector), hash) in __find_stripe()
624 if (sh->sector == sector && sh->generation == generation) in __find_stripe()
630 static struct stripe_head *find_get_stripe(struct r5conf *conf, in find_get_stripe() argument
636 sh = __find_stripe(conf, sector, generation); in find_get_stripe()
640 if (atomic_inc_not_zero(&sh->count)) in find_get_stripe()
645 * be on a list (sh->lru). Must remove the stripe from the list that in find_get_stripe()
649 spin_lock(&conf->device_lock); in find_get_stripe()
650 if (!atomic_read(&sh->count)) { in find_get_stripe()
651 if (!test_bit(STRIPE_HANDLE, &sh->state)) in find_get_stripe()
652 atomic_inc(&conf->active_stripes); in find_get_stripe()
653 BUG_ON(list_empty(&sh->lru) && in find_get_stripe()
654 !test_bit(STRIPE_EXPANDING, &sh->state)); in find_get_stripe()
656 if (!list_empty(conf->inactive_list + hash)) in find_get_stripe()
658 list_del_init(&sh->lru); in find_get_stripe()
659 if (list_empty(conf->inactive_list + hash) && in find_get_stripe()
661 atomic_inc(&conf->empty_inactive_list_nr); in find_get_stripe()
662 if (sh->group) { in find_get_stripe()
663 sh->group->stripes_cnt--; in find_get_stripe()
664 sh->group = NULL; in find_get_stripe()
667 atomic_inc(&sh->count); in find_get_stripe()
668 spin_unlock(&conf->device_lock); in find_get_stripe()
675 * - start an array
676 * - remove non-faulty devices
677 * - add a spare
678 * - allow a reshape
683 * of the two sections, and some non-in_sync devices may
686 * Most calls to this function hold &conf->device_lock. Calls
690 int raid5_calc_degraded(struct r5conf *conf) in raid5_calc_degraded() argument
697 for (i = 0; i < conf->previous_raid_disks; i++) { in raid5_calc_degraded()
698 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_calc_degraded()
699 if (rdev && test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
700 rdev = rcu_dereference(conf->disks[i].replacement); in raid5_calc_degraded()
701 if (!rdev || test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
703 else if (test_bit(In_sync, &rdev->flags)) in raid5_calc_degraded()
706 /* not in-sync or faulty. in raid5_calc_degraded()
713 * be in-sync. in raid5_calc_degraded()
715 if (conf->raid_disks >= conf->previous_raid_disks) in raid5_calc_degraded()
719 if (conf->raid_disks == conf->previous_raid_disks) in raid5_calc_degraded()
723 for (i = 0; i < conf->raid_disks; i++) { in raid5_calc_degraded()
724 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_calc_degraded()
725 if (rdev && test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
726 rdev = rcu_dereference(conf->disks[i].replacement); in raid5_calc_degraded()
727 if (!rdev || test_bit(Faulty, &rdev->flags)) in raid5_calc_degraded()
729 else if (test_bit(In_sync, &rdev->flags)) in raid5_calc_degraded()
732 /* not in-sync or faulty. in raid5_calc_degraded()
737 if (conf->raid_disks <= conf->previous_raid_disks) in raid5_calc_degraded()
746 static bool has_failed(struct r5conf *conf) in has_failed() argument
748 int degraded = conf->mddev->degraded; in has_failed()
750 if (test_bit(MD_BROKEN, &conf->mddev->flags)) in has_failed()
753 if (conf->mddev->reshape_position != MaxSector) in has_failed()
754 degraded = raid5_calc_degraded(conf); in has_failed()
756 return degraded > conf->max_degraded; in has_failed()
791 static bool is_inactive_blocked(struct r5conf *conf, int hash) in is_inactive_blocked() argument
793 if (list_empty(conf->inactive_list + hash)) in is_inactive_blocked()
796 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) in is_inactive_blocked()
799 return (atomic_read(&conf->active_stripes) < in is_inactive_blocked()
800 (conf->max_nr_stripes * 3 / 4)); in is_inactive_blocked()
803 struct stripe_head *raid5_get_active_stripe(struct r5conf *conf, in raid5_get_active_stripe() argument
808 int hash = stripe_hash_locks_hash(conf, sector); in raid5_get_active_stripe()
813 spin_lock_irq(conf->hash_locks + hash); in raid5_get_active_stripe()
816 if (!(flags & R5_GAS_NOQUIESCE) && conf->quiesce) { in raid5_get_active_stripe()
824 if (ctx && ctx->batch_last) { in raid5_get_active_stripe()
825 raid5_release_stripe(ctx->batch_last); in raid5_get_active_stripe()
826 ctx->batch_last = NULL; in raid5_get_active_stripe()
829 wait_event_lock_irq(conf->wait_for_quiescent, in raid5_get_active_stripe()
830 !conf->quiesce, in raid5_get_active_stripe()
831 *(conf->hash_locks + hash)); in raid5_get_active_stripe()
834 sh = find_get_stripe(conf, sector, conf->generation - previous, in raid5_get_active_stripe()
839 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { in raid5_get_active_stripe()
840 sh = get_free_stripe(conf, hash); in raid5_get_active_stripe()
842 r5c_check_stripe_cache_usage(conf); in raid5_get_active_stripe()
844 atomic_inc(&sh->count); in raid5_get_active_stripe()
848 if (!test_bit(R5_DID_ALLOC, &conf->cache_state)) in raid5_get_active_stripe()
849 set_bit(R5_ALLOC_MORE, &conf->cache_state); in raid5_get_active_stripe()
855 set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); in raid5_get_active_stripe()
856 r5l_wake_reclaim(conf->log, 0); in raid5_get_active_stripe()
859 if (ctx && ctx->batch_last) { in raid5_get_active_stripe()
860 raid5_release_stripe(ctx->batch_last); in raid5_get_active_stripe()
861 ctx->batch_last = NULL; in raid5_get_active_stripe()
864 wait_event_lock_irq(conf->wait_for_stripe, in raid5_get_active_stripe()
865 is_inactive_blocked(conf, hash), in raid5_get_active_stripe()
866 *(conf->hash_locks + hash)); in raid5_get_active_stripe()
867 clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); in raid5_get_active_stripe()
870 spin_unlock_irq(conf->hash_locks + hash); in raid5_get_active_stripe()
876 BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded)); in is_full_stripe_write()
877 return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded); in is_full_stripe_write()
881 __acquires(&sh1->stripe_lock) in lock_two_stripes()
882 __acquires(&sh2->stripe_lock) in lock_two_stripes()
885 spin_lock_irq(&sh2->stripe_lock); in lock_two_stripes()
886 spin_lock_nested(&sh1->stripe_lock, 1); in lock_two_stripes()
888 spin_lock_irq(&sh1->stripe_lock); in lock_two_stripes()
889 spin_lock_nested(&sh2->stripe_lock, 1); in lock_two_stripes()
894 __releases(&sh1->stripe_lock) in unlock_two_stripes()
895 __releases(&sh2->stripe_lock) in unlock_two_stripes()
897 spin_unlock(&sh1->stripe_lock); in unlock_two_stripes()
898 spin_unlock_irq(&sh2->stripe_lock); in unlock_two_stripes()
904 struct r5conf *conf = sh->raid_conf; in stripe_can_batch() local
906 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in stripe_can_batch()
908 return test_bit(STRIPE_BATCH_READY, &sh->state) && in stripe_can_batch()
909 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && in stripe_can_batch()
914 static void stripe_add_to_batch_list(struct r5conf *conf, in stripe_add_to_batch_list() argument
923 tmp_sec = sh->sector; in stripe_add_to_batch_list()
924 if (!sector_div(tmp_sec, conf->chunk_sectors)) in stripe_add_to_batch_list()
926 head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf); in stripe_add_to_batch_list()
928 if (last_sh && head_sector == last_sh->sector) { in stripe_add_to_batch_list()
930 atomic_inc(&head->count); in stripe_add_to_batch_list()
932 hash = stripe_hash_locks_hash(conf, head_sector); in stripe_add_to_batch_list()
933 spin_lock_irq(conf->hash_locks + hash); in stripe_add_to_batch_list()
934 head = find_get_stripe(conf, head_sector, conf->generation, in stripe_add_to_batch_list()
936 spin_unlock_irq(conf->hash_locks + hash); in stripe_add_to_batch_list()
948 if (sh->batch_head) in stripe_add_to_batch_list()
952 while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in stripe_add_to_batch_list()
954 if (head->dev[dd_idx].towrite->bi_opf != sh->dev[dd_idx].towrite->bi_opf || in stripe_add_to_batch_list()
955 bio_op(head->dev[dd_idx].towrite) != bio_op(sh->dev[dd_idx].towrite)) in stripe_add_to_batch_list()
958 if (head->batch_head) { in stripe_add_to_batch_list()
959 spin_lock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
962 spin_unlock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
969 * this stripe->batch_head doesn't get assigned, which in stripe_add_to_batch_list()
972 sh->batch_head = head->batch_head; in stripe_add_to_batch_list()
978 list_add(&sh->batch_list, &head->batch_list); in stripe_add_to_batch_list()
979 spin_unlock(&head->batch_head->batch_lock); in stripe_add_to_batch_list()
981 head->batch_head = head; in stripe_add_to_batch_list()
982 sh->batch_head = head->batch_head; in stripe_add_to_batch_list()
983 spin_lock(&head->batch_lock); in stripe_add_to_batch_list()
984 list_add_tail(&sh->batch_list, &head->batch_list); in stripe_add_to_batch_list()
985 spin_unlock(&head->batch_lock); in stripe_add_to_batch_list()
988 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in stripe_add_to_batch_list()
989 if (atomic_dec_return(&conf->preread_active_stripes) in stripe_add_to_batch_list()
991 md_wakeup_thread(conf->mddev->thread); in stripe_add_to_batch_list()
993 if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { in stripe_add_to_batch_list()
994 int seq = sh->bm_seq; in stripe_add_to_batch_list()
995 if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && in stripe_add_to_batch_list()
996 sh->batch_head->bm_seq > seq) in stripe_add_to_batch_list()
997 seq = sh->batch_head->bm_seq; in stripe_add_to_batch_list()
998 set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); in stripe_add_to_batch_list()
999 sh->batch_head->bm_seq = seq; in stripe_add_to_batch_list()
1002 atomic_inc(&sh->count); in stripe_add_to_batch_list()
1012 static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) in use_new_offset() argument
1014 sector_t progress = conf->reshape_progress; in use_new_offset()
1016 * of conf->generation, or ->data_offset that was set before in use_new_offset()
1022 if (sh->generation == conf->generation - 1) in use_new_offset()
1024 /* We are in a reshape, and this is a new-generation stripe, in use_new_offset()
1045 if (da->sector > db->sector) in cmp_stripe()
1047 if (da->sector < db->sector) in cmp_stripe()
1048 return -1; in cmp_stripe()
1052 static void dispatch_defer_bios(struct r5conf *conf, int target, in dispatch_defer_bios() argument
1059 if (conf->pending_data_cnt == 0) in dispatch_defer_bios()
1062 list_sort(NULL, &conf->pending_list, cmp_stripe); in dispatch_defer_bios()
1064 first = conf->pending_list.next; in dispatch_defer_bios()
1067 if (conf->next_pending_data) in dispatch_defer_bios()
1068 list_move_tail(&conf->pending_list, in dispatch_defer_bios()
1069 &conf->next_pending_data->sibling); in dispatch_defer_bios()
1071 while (!list_empty(&conf->pending_list)) { in dispatch_defer_bios()
1072 data = list_first_entry(&conf->pending_list, in dispatch_defer_bios()
1074 if (&data->sibling == first) in dispatch_defer_bios()
1075 first = data->sibling.next; in dispatch_defer_bios()
1076 next = data->sibling.next; in dispatch_defer_bios()
1078 bio_list_merge(list, &data->bios); in dispatch_defer_bios()
1079 list_move(&data->sibling, &conf->free_list); in dispatch_defer_bios()
1084 conf->pending_data_cnt -= cnt; in dispatch_defer_bios()
1085 BUG_ON(conf->pending_data_cnt < 0 || cnt < target); in dispatch_defer_bios()
1087 if (next != &conf->pending_list) in dispatch_defer_bios()
1088 conf->next_pending_data = list_entry(next, in dispatch_defer_bios()
1091 conf->next_pending_data = NULL; in dispatch_defer_bios()
1093 if (first != &conf->pending_list) in dispatch_defer_bios()
1094 list_move_tail(&conf->pending_list, first); in dispatch_defer_bios()
1097 static void flush_deferred_bios(struct r5conf *conf) in flush_deferred_bios() argument
1101 if (conf->pending_data_cnt == 0) in flush_deferred_bios()
1104 spin_lock(&conf->pending_bios_lock); in flush_deferred_bios()
1105 dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp); in flush_deferred_bios()
1106 BUG_ON(conf->pending_data_cnt != 0); in flush_deferred_bios()
1107 spin_unlock(&conf->pending_bios_lock); in flush_deferred_bios()
1112 static void defer_issue_bios(struct r5conf *conf, sector_t sector, in defer_issue_bios() argument
1118 spin_lock(&conf->pending_bios_lock); in defer_issue_bios()
1119 ent = list_first_entry(&conf->free_list, struct r5pending_data, in defer_issue_bios()
1121 list_move_tail(&ent->sibling, &conf->pending_list); in defer_issue_bios()
1122 ent->sector = sector; in defer_issue_bios()
1123 bio_list_init(&ent->bios); in defer_issue_bios()
1124 bio_list_merge(&ent->bios, bios); in defer_issue_bios()
1125 conf->pending_data_cnt++; in defer_issue_bios()
1126 if (conf->pending_data_cnt >= PENDING_IO_MAX) in defer_issue_bios()
1127 dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp); in defer_issue_bios()
1129 spin_unlock(&conf->pending_bios_lock); in defer_issue_bios()
1141 struct r5conf *conf = sh->raid_conf; in ops_run_io() local
1142 int i, disks = sh->disks; in ops_run_io()
1153 should_defer = conf->batch_bio_dispatch && conf->group_cnt; in ops_run_io()
1155 for (i = disks; i--; ) { in ops_run_io()
1163 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { in ops_run_io()
1165 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) in ops_run_io()
1167 if (test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_io()
1169 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) in ops_run_io()
1172 &sh->dev[i].flags)) { in ops_run_io()
1177 if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags)) in ops_run_io()
1181 dev = &sh->dev[i]; in ops_run_io()
1182 bi = &dev->req; in ops_run_io()
1183 rbi = &dev->rreq; /* For writing to replacement */ in ops_run_io()
1186 rrdev = rcu_dereference(conf->disks[i].replacement); in ops_run_io()
1188 rdev = rcu_dereference(conf->disks[i].rdev); in ops_run_io()
1200 if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev) in ops_run_io()
1205 if (rdev && test_bit(Faulty, &rdev->flags)) in ops_run_io()
1208 atomic_inc(&rdev->nr_pending); in ops_run_io()
1209 if (rrdev && test_bit(Faulty, &rrdev->flags)) in ops_run_io()
1212 atomic_inc(&rrdev->nr_pending); in ops_run_io()
1220 test_bit(WriteErrorSeen, &rdev->flags)) { in ops_run_io()
1223 int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in ops_run_io()
1229 set_bit(BlockedBadBlocks, &rdev->flags); in ops_run_io()
1230 if (!conf->mddev->external && in ops_run_io()
1231 conf->mddev->sb_flags) { in ops_run_io()
1234 * bad block log - better give it in ops_run_io()
1236 md_check_recovery(conf->mddev); in ops_run_io()
1243 atomic_inc(&rdev->nr_pending); in ops_run_io()
1244 md_wait_for_blocked_rdev(rdev, conf->mddev); in ops_run_io()
1246 /* Acknowledged bad block - skip the write */ in ops_run_io()
1247 rdev_dec_pending(rdev, conf->mddev); in ops_run_io()
1253 if (s->syncing || s->expanding || s->expanded in ops_run_io()
1254 || s->replacing) in ops_run_io()
1255 md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf)); in ops_run_io()
1257 set_bit(STRIPE_IO_STARTED, &sh->state); in ops_run_io()
1259 bio_init(bi, rdev->bdev, &dev->vec, 1, op | op_flags); in ops_run_io()
1260 bi->bi_end_io = op_is_write(op) in ops_run_io()
1263 bi->bi_private = sh; in ops_run_io()
1266 __func__, (unsigned long long)sh->sector, in ops_run_io()
1267 bi->bi_opf, i); in ops_run_io()
1268 atomic_inc(&sh->count); in ops_run_io()
1270 atomic_inc(&head_sh->count); in ops_run_io()
1271 if (use_new_offset(conf, sh)) in ops_run_io()
1272 bi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1273 + rdev->new_data_offset); in ops_run_io()
1275 bi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1276 + rdev->data_offset); in ops_run_io()
1277 if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags)) in ops_run_io()
1278 bi->bi_opf |= REQ_NOMERGE; in ops_run_io()
1280 if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) in ops_run_io()
1281 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in ops_run_io()
1284 test_bit(R5_InJournal, &sh->dev[i].flags)) in ops_run_io()
1290 sh->dev[i].vec.bv_page = sh->dev[i].orig_page; in ops_run_io()
1292 sh->dev[i].vec.bv_page = sh->dev[i].page; in ops_run_io()
1293 bi->bi_vcnt = 1; in ops_run_io()
1294 bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1295 bi->bi_io_vec[0].bv_offset = sh->dev[i].offset; in ops_run_io()
1296 bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1302 bi->bi_vcnt = 0; in ops_run_io()
1304 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); in ops_run_io()
1306 if (conf->mddev->gendisk) in ops_run_io()
1308 disk_devt(conf->mddev->gendisk), in ops_run_io()
1309 sh->dev[i].sector); in ops_run_io()
1316 if (s->syncing || s->expanding || s->expanded in ops_run_io()
1317 || s->replacing) in ops_run_io()
1318 md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf)); in ops_run_io()
1320 set_bit(STRIPE_IO_STARTED, &sh->state); in ops_run_io()
1322 bio_init(rbi, rrdev->bdev, &dev->rvec, 1, op | op_flags); in ops_run_io()
1324 rbi->bi_end_io = raid5_end_write_request; in ops_run_io()
1325 rbi->bi_private = sh; in ops_run_io()
1329 __func__, (unsigned long long)sh->sector, in ops_run_io()
1330 rbi->bi_opf, i); in ops_run_io()
1331 atomic_inc(&sh->count); in ops_run_io()
1333 atomic_inc(&head_sh->count); in ops_run_io()
1334 if (use_new_offset(conf, sh)) in ops_run_io()
1335 rbi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1336 + rrdev->new_data_offset); in ops_run_io()
1338 rbi->bi_iter.bi_sector = (sh->sector in ops_run_io()
1339 + rrdev->data_offset); in ops_run_io()
1340 if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) in ops_run_io()
1341 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in ops_run_io()
1342 sh->dev[i].rvec.bv_page = sh->dev[i].page; in ops_run_io()
1343 rbi->bi_vcnt = 1; in ops_run_io()
1344 rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1345 rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset; in ops_run_io()
1346 rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); in ops_run_io()
1352 rbi->bi_vcnt = 0; in ops_run_io()
1353 if (conf->mddev->gendisk) in ops_run_io()
1355 disk_devt(conf->mddev->gendisk), in ops_run_io()
1356 sh->dev[i].sector); in ops_run_io()
1364 set_bit(STRIPE_DEGRADED, &sh->state); in ops_run_io()
1366 bi->bi_opf, i, (unsigned long long)sh->sector); in ops_run_io()
1367 clear_bit(R5_LOCKED, &sh->dev[i].flags); in ops_run_io()
1368 set_bit(STRIPE_HANDLE, &sh->state); in ops_run_io()
1371 if (!head_sh->batch_head) in ops_run_io()
1373 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_io()
1380 defer_issue_bios(conf, head_sh->sector, &pending_bios); in ops_run_io()
1385 unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx, in async_copy_data() argument
1394 struct r5conf *conf = sh->raid_conf; in async_copy_data() local
1396 if (bio->bi_iter.bi_sector >= sector) in async_copy_data()
1397 page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512; in async_copy_data()
1399 page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512; in async_copy_data()
1403 init_async_submit(&submit, flags, tx, NULL, NULL, NULL); in async_copy_data()
1411 b_offset = -page_offset; in async_copy_data()
1413 len -= b_offset; in async_copy_data()
1416 if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf)) in async_copy_data()
1417 clen = RAID5_STRIPE_SIZE(conf) - page_offset; in async_copy_data()
1425 if (conf->skip_copy && in async_copy_data()
1427 clen == RAID5_STRIPE_SIZE(conf) && in async_copy_data()
1431 tx = async_memcpy(*page, bio_page, page_offset + poff, in async_copy_data()
1434 tx = async_memcpy(bio_page, *page, b_offset, in async_copy_data()
1438 submit.depend_tx = tx; in async_copy_data()
1445 return tx; in async_copy_data()
1452 struct r5conf *conf = sh->raid_conf; in ops_complete_biofill() local
1455 (unsigned long long)sh->sector); in ops_complete_biofill()
1458 for (i = sh->disks; i--; ) { in ops_complete_biofill()
1459 struct r5dev *dev = &sh->dev[i]; in ops_complete_biofill()
1466 if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { in ops_complete_biofill()
1469 BUG_ON(!dev->read); in ops_complete_biofill()
1470 rbi = dev->read; in ops_complete_biofill()
1471 dev->read = NULL; in ops_complete_biofill()
1472 while (rbi && rbi->bi_iter.bi_sector < in ops_complete_biofill()
1473 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_complete_biofill()
1474 rbi2 = r5_next_bio(conf, rbi, dev->sector); in ops_complete_biofill()
1480 clear_bit(STRIPE_BIOFILL_RUN, &sh->state); in ops_complete_biofill()
1482 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_biofill()
1488 struct dma_async_tx_descriptor *tx = NULL; in ops_run_biofill() local
1491 struct r5conf *conf = sh->raid_conf; in ops_run_biofill() local
1493 BUG_ON(sh->batch_head); in ops_run_biofill()
1495 (unsigned long long)sh->sector); in ops_run_biofill()
1497 for (i = sh->disks; i--; ) { in ops_run_biofill()
1498 struct r5dev *dev = &sh->dev[i]; in ops_run_biofill()
1499 if (test_bit(R5_Wantfill, &dev->flags)) { in ops_run_biofill()
1501 spin_lock_irq(&sh->stripe_lock); in ops_run_biofill()
1502 dev->read = rbi = dev->toread; in ops_run_biofill()
1503 dev->toread = NULL; in ops_run_biofill()
1504 spin_unlock_irq(&sh->stripe_lock); in ops_run_biofill()
1505 while (rbi && rbi->bi_iter.bi_sector < in ops_run_biofill()
1506 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_run_biofill()
1507 tx = async_copy_data(0, rbi, &dev->page, in ops_run_biofill()
1508 dev->offset, in ops_run_biofill()
1509 dev->sector, tx, sh, 0); in ops_run_biofill()
1510 rbi = r5_next_bio(conf, rbi, dev->sector); in ops_run_biofill()
1515 atomic_inc(&sh->count); in ops_run_biofill()
1516 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); in ops_run_biofill()
1527 tgt = &sh->dev[target]; in mark_target_uptodate()
1528 set_bit(R5_UPTODATE, &tgt->flags); in mark_target_uptodate()
1529 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in mark_target_uptodate()
1530 clear_bit(R5_Wantcompute, &tgt->flags); in mark_target_uptodate()
1538 (unsigned long long)sh->sector); in ops_complete_compute()
1541 mark_target_uptodate(sh, sh->ops.target); in ops_complete_compute()
1542 mark_target_uptodate(sh, sh->ops.target2); in ops_complete_compute()
1544 clear_bit(STRIPE_COMPUTE_RUN, &sh->state); in ops_complete_compute()
1545 if (sh->check_state == check_state_compute_run) in ops_complete_compute()
1546 sh->check_state = check_state_compute_result; in ops_complete_compute()
1547 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_compute()
1554 return percpu->scribble + i * percpu->scribble_obj_size; in to_addr_page()
1561 return (void *) (to_addr_page(percpu, i) + sh->disks + 2); in to_addr_conv()
1570 return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2); in to_addr_offs()
1576 int disks = sh->disks; in ops_run_compute5()
1579 int target = sh->ops.target; in ops_run_compute5()
1580 struct r5dev *tgt = &sh->dev[target]; in ops_run_compute5()
1581 struct page *xor_dest = tgt->page; in ops_run_compute5()
1582 unsigned int off_dest = tgt->offset; in ops_run_compute5()
1584 struct dma_async_tx_descriptor *tx; in ops_run_compute5() local
1588 BUG_ON(sh->batch_head); in ops_run_compute5()
1591 __func__, (unsigned long long)sh->sector, target); in ops_run_compute5()
1592 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute5()
1594 for (i = disks; i--; ) { in ops_run_compute5()
1596 off_srcs[count] = sh->dev[i].offset; in ops_run_compute5()
1597 xor_srcs[count++] = sh->dev[i].page; in ops_run_compute5()
1601 atomic_inc(&sh->count); in ops_run_compute5()
1606 tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], in ops_run_compute5()
1607 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute5()
1609 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_compute5()
1610 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute5()
1612 return tx; in ops_run_compute5()
1615 /* set_syndrome_sources - populate source buffers for gen_syndrome
1616 * @srcs - (struct page *) array of size sh->disks
1617 * @offs - (unsigned int) array of offset for each page
1618 * @sh - stripe_head to parse
1630 int disks = sh->disks; in set_syndrome_sources()
1631 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); in set_syndrome_sources()
1643 struct r5dev *dev = &sh->dev[i]; in set_syndrome_sources()
1645 if (i == sh->qd_idx || i == sh->pd_idx || in set_syndrome_sources()
1648 (test_bit(R5_Wantdrain, &dev->flags) || in set_syndrome_sources()
1649 test_bit(R5_InJournal, &dev->flags))) || in set_syndrome_sources()
1651 (dev->written || in set_syndrome_sources()
1652 test_bit(R5_InJournal, &dev->flags)))) { in set_syndrome_sources()
1653 if (test_bit(R5_InJournal, &dev->flags)) in set_syndrome_sources()
1654 srcs[slot] = sh->dev[i].orig_page; in set_syndrome_sources()
1656 srcs[slot] = sh->dev[i].page; in set_syndrome_sources()
1662 offs[slot] = sh->dev[i].offset; in set_syndrome_sources()
1673 int disks = sh->disks; in ops_run_compute6_1()
1677 int qd_idx = sh->qd_idx; in ops_run_compute6_1()
1678 struct dma_async_tx_descriptor *tx; in ops_run_compute6_1() local
1686 BUG_ON(sh->batch_head); in ops_run_compute6_1()
1687 if (sh->ops.target < 0) in ops_run_compute6_1()
1688 target = sh->ops.target2; in ops_run_compute6_1()
1689 else if (sh->ops.target2 < 0) in ops_run_compute6_1()
1690 target = sh->ops.target; in ops_run_compute6_1()
1696 __func__, (unsigned long long)sh->sector, target); in ops_run_compute6_1()
1698 tgt = &sh->dev[target]; in ops_run_compute6_1()
1699 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute6_1()
1700 dest = tgt->page; in ops_run_compute6_1()
1701 dest_off = tgt->offset; in ops_run_compute6_1()
1703 atomic_inc(&sh->count); in ops_run_compute6_1()
1712 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_compute6_1()
1713 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute6_1()
1715 /* Compute any data- or p-drive using XOR */ in ops_run_compute6_1()
1717 for (i = disks; i-- ; ) { in ops_run_compute6_1()
1720 offs[count] = sh->dev[i].offset; in ops_run_compute6_1()
1721 blocks[count++] = sh->dev[i].page; in ops_run_compute6_1()
1727 tx = async_xor_offs(dest, dest_off, blocks, offs, count, in ops_run_compute6_1()
1728 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_compute6_1()
1731 return tx; in ops_run_compute6_1()
1737 int i, count, disks = sh->disks; in ops_run_compute6_2()
1738 int syndrome_disks = sh->ddf_layout ? disks : disks-2; in ops_run_compute6_2()
1740 int faila = -1, failb = -1; in ops_run_compute6_2()
1741 int target = sh->ops.target; in ops_run_compute6_2()
1742 int target2 = sh->ops.target2; in ops_run_compute6_2()
1743 struct r5dev *tgt = &sh->dev[target]; in ops_run_compute6_2()
1744 struct r5dev *tgt2 = &sh->dev[target2]; in ops_run_compute6_2()
1745 struct dma_async_tx_descriptor *tx; in ops_run_compute6_2() local
1750 BUG_ON(sh->batch_head); in ops_run_compute6_2()
1752 __func__, (unsigned long long)sh->sector, target, target2); in ops_run_compute6_2()
1754 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); in ops_run_compute6_2()
1755 BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); in ops_run_compute6_2()
1757 /* we need to open-code set_syndrome_sources to handle the in ops_run_compute6_2()
1769 offs[slot] = sh->dev[i].offset; in ops_run_compute6_2()
1770 blocks[slot] = sh->dev[i].page; in ops_run_compute6_2()
1783 __func__, (unsigned long long)sh->sector, faila, failb); in ops_run_compute6_2()
1785 atomic_inc(&sh->count); in ops_run_compute6_2()
1795 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1801 int qd_idx = sh->qd_idx; in ops_run_compute6_2()
1810 for (i = disks; i-- ; ) { in ops_run_compute6_2()
1813 offs[count] = sh->dev[i].offset; in ops_run_compute6_2()
1814 blocks[count++] = sh->dev[i].page; in ops_run_compute6_2()
1816 dest = sh->dev[data_target].page; in ops_run_compute6_2()
1817 dest_off = sh->dev[data_target].offset; in ops_run_compute6_2()
1822 tx = async_xor_offs(dest, dest_off, blocks, offs, count, in ops_run_compute6_2()
1823 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1827 init_async_submit(&submit, ASYNC_TX_FENCE, tx, in ops_run_compute6_2()
1831 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1841 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1847 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_compute6_2()
1859 (unsigned long long)sh->sector); in ops_complete_prexor()
1861 if (r5c_is_writeback(sh->raid_conf->log)) in ops_complete_prexor()
1863 * raid5-cache write back uses orig_page during prexor. in ops_complete_prexor()
1871 struct dma_async_tx_descriptor *tx) in ops_run_prexor5() argument
1873 int disks = sh->disks; in ops_run_prexor5()
1876 int count = 0, pd_idx = sh->pd_idx, i; in ops_run_prexor5()
1880 unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset; in ops_run_prexor5()
1881 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; in ops_run_prexor5()
1883 BUG_ON(sh->batch_head); in ops_run_prexor5()
1885 (unsigned long long)sh->sector); in ops_run_prexor5()
1887 for (i = disks; i--; ) { in ops_run_prexor5()
1888 struct r5dev *dev = &sh->dev[i]; in ops_run_prexor5()
1890 if (test_bit(R5_InJournal, &dev->flags)) { in ops_run_prexor5()
1895 off_srcs[count] = dev->offset; in ops_run_prexor5()
1896 xor_srcs[count++] = dev->orig_page; in ops_run_prexor5()
1897 } else if (test_bit(R5_Wantdrain, &dev->flags)) { in ops_run_prexor5()
1898 off_srcs[count] = dev->offset; in ops_run_prexor5()
1899 xor_srcs[count++] = dev->page; in ops_run_prexor5()
1903 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, in ops_run_prexor5()
1905 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_prexor5()
1906 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_prexor5()
1908 return tx; in ops_run_prexor5()
1913 struct dma_async_tx_descriptor *tx) in ops_run_prexor6() argument
1921 (unsigned long long)sh->sector); in ops_run_prexor6()
1925 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, in ops_run_prexor6()
1927 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_prexor6()
1928 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_prexor6()
1930 return tx; in ops_run_prexor6()
1934 ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) in ops_run_biodrain() argument
1936 struct r5conf *conf = sh->raid_conf; in ops_run_biodrain() local
1937 int disks = sh->disks; in ops_run_biodrain()
1942 (unsigned long long)sh->sector); in ops_run_biodrain()
1944 for (i = disks; i--; ) { in ops_run_biodrain()
1949 if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) { in ops_run_biodrain()
1953 dev = &sh->dev[i]; in ops_run_biodrain()
1958 clear_bit(R5_InJournal, &dev->flags); in ops_run_biodrain()
1959 spin_lock_irq(&sh->stripe_lock); in ops_run_biodrain()
1960 chosen = dev->towrite; in ops_run_biodrain()
1961 dev->towrite = NULL; in ops_run_biodrain()
1962 sh->overwrite_disks = 0; in ops_run_biodrain()
1963 BUG_ON(dev->written); in ops_run_biodrain()
1964 wbi = dev->written = chosen; in ops_run_biodrain()
1965 spin_unlock_irq(&sh->stripe_lock); in ops_run_biodrain()
1966 WARN_ON(dev->page != dev->orig_page); in ops_run_biodrain()
1968 while (wbi && wbi->bi_iter.bi_sector < in ops_run_biodrain()
1969 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in ops_run_biodrain()
1970 if (wbi->bi_opf & REQ_FUA) in ops_run_biodrain()
1971 set_bit(R5_WantFUA, &dev->flags); in ops_run_biodrain()
1972 if (wbi->bi_opf & REQ_SYNC) in ops_run_biodrain()
1973 set_bit(R5_SyncIO, &dev->flags); in ops_run_biodrain()
1975 set_bit(R5_Discard, &dev->flags); in ops_run_biodrain()
1977 tx = async_copy_data(1, wbi, &dev->page, in ops_run_biodrain()
1978 dev->offset, in ops_run_biodrain()
1979 dev->sector, tx, sh, in ops_run_biodrain()
1980 r5c_is_writeback(conf->log)); in ops_run_biodrain()
1981 if (dev->page != dev->orig_page && in ops_run_biodrain()
1982 !r5c_is_writeback(conf->log)) { in ops_run_biodrain()
1983 set_bit(R5_SkipCopy, &dev->flags); in ops_run_biodrain()
1984 clear_bit(R5_UPTODATE, &dev->flags); in ops_run_biodrain()
1985 clear_bit(R5_OVERWRITE, &dev->flags); in ops_run_biodrain()
1988 wbi = r5_next_bio(conf, wbi, dev->sector); in ops_run_biodrain()
1991 if (head_sh->batch_head) { in ops_run_biodrain()
1992 sh = list_first_entry(&sh->batch_list, in ops_run_biodrain()
2002 return tx; in ops_run_biodrain()
2008 int disks = sh->disks; in ops_complete_reconstruct()
2009 int pd_idx = sh->pd_idx; in ops_complete_reconstruct()
2010 int qd_idx = sh->qd_idx; in ops_complete_reconstruct()
2015 (unsigned long long)sh->sector); in ops_complete_reconstruct()
2017 for (i = disks; i--; ) { in ops_complete_reconstruct()
2018 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); in ops_complete_reconstruct()
2019 sync |= test_bit(R5_SyncIO, &sh->dev[i].flags); in ops_complete_reconstruct()
2020 discard |= test_bit(R5_Discard, &sh->dev[i].flags); in ops_complete_reconstruct()
2023 for (i = disks; i--; ) { in ops_complete_reconstruct()
2024 struct r5dev *dev = &sh->dev[i]; in ops_complete_reconstruct()
2026 if (dev->written || i == pd_idx || i == qd_idx) { in ops_complete_reconstruct()
2027 if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) { in ops_complete_reconstruct()
2028 set_bit(R5_UPTODATE, &dev->flags); in ops_complete_reconstruct()
2029 if (test_bit(STRIPE_EXPAND_READY, &sh->state)) in ops_complete_reconstruct()
2030 set_bit(R5_Expanded, &dev->flags); in ops_complete_reconstruct()
2033 set_bit(R5_WantFUA, &dev->flags); in ops_complete_reconstruct()
2035 set_bit(R5_SyncIO, &dev->flags); in ops_complete_reconstruct()
2039 if (sh->reconstruct_state == reconstruct_state_drain_run) in ops_complete_reconstruct()
2040 sh->reconstruct_state = reconstruct_state_drain_result; in ops_complete_reconstruct()
2041 else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) in ops_complete_reconstruct()
2042 sh->reconstruct_state = reconstruct_state_prexor_drain_result; in ops_complete_reconstruct()
2044 BUG_ON(sh->reconstruct_state != reconstruct_state_run); in ops_complete_reconstruct()
2045 sh->reconstruct_state = reconstruct_state_result; in ops_complete_reconstruct()
2048 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_reconstruct()
2054 struct dma_async_tx_descriptor *tx) in ops_run_reconstruct5() argument
2056 int disks = sh->disks; in ops_run_reconstruct5()
2060 int count, pd_idx = sh->pd_idx, i; in ops_run_reconstruct5()
2070 (unsigned long long)sh->sector); in ops_run_reconstruct5()
2072 for (i = 0; i < sh->disks; i++) { in ops_run_reconstruct5()
2075 if (!test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_reconstruct5()
2078 if (i >= sh->disks) { in ops_run_reconstruct5()
2079 atomic_inc(&sh->count); in ops_run_reconstruct5()
2080 set_bit(R5_Discard, &sh->dev[pd_idx].flags); in ops_run_reconstruct5()
2089 * that are part of a read-modify-write (written) in ops_run_reconstruct5()
2091 if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_reconstruct5()
2093 off_dest = off_srcs[count] = sh->dev[pd_idx].offset; in ops_run_reconstruct5()
2094 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; in ops_run_reconstruct5()
2095 for (i = disks; i--; ) { in ops_run_reconstruct5()
2096 struct r5dev *dev = &sh->dev[i]; in ops_run_reconstruct5()
2097 if (head_sh->dev[i].written || in ops_run_reconstruct5()
2098 test_bit(R5_InJournal, &head_sh->dev[i].flags)) { in ops_run_reconstruct5()
2099 off_srcs[count] = dev->offset; in ops_run_reconstruct5()
2100 xor_srcs[count++] = dev->page; in ops_run_reconstruct5()
2104 xor_dest = sh->dev[pd_idx].page; in ops_run_reconstruct5()
2105 off_dest = sh->dev[pd_idx].offset; in ops_run_reconstruct5()
2106 for (i = disks; i--; ) { in ops_run_reconstruct5()
2107 struct r5dev *dev = &sh->dev[i]; in ops_run_reconstruct5()
2109 off_srcs[count] = dev->offset; in ops_run_reconstruct5()
2110 xor_srcs[count++] = dev->page; in ops_run_reconstruct5()
2120 last_stripe = !head_sh->batch_head || in ops_run_reconstruct5()
2121 list_first_entry(&sh->batch_list, in ops_run_reconstruct5()
2127 atomic_inc(&head_sh->count); in ops_run_reconstruct5()
2128 init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh, in ops_run_reconstruct5()
2132 init_async_submit(&submit, flags, tx, NULL, NULL, in ops_run_reconstruct5()
2137 tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], in ops_run_reconstruct5()
2138 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct5()
2140 tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_reconstruct5()
2141 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct5()
2144 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_reconstruct5()
2152 struct dma_async_tx_descriptor *tx) in ops_run_reconstruct6() argument
2163 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); in ops_run_reconstruct6()
2165 for (i = 0; i < sh->disks; i++) { in ops_run_reconstruct6()
2166 if (sh->pd_idx == i || sh->qd_idx == i) in ops_run_reconstruct6()
2168 if (!test_bit(R5_Discard, &sh->dev[i].flags)) in ops_run_reconstruct6()
2171 if (i >= sh->disks) { in ops_run_reconstruct6()
2172 atomic_inc(&sh->count); in ops_run_reconstruct6()
2173 set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); in ops_run_reconstruct6()
2174 set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); in ops_run_reconstruct6()
2183 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { in ops_run_reconstruct6()
2192 last_stripe = !head_sh->batch_head || in ops_run_reconstruct6()
2193 list_first_entry(&sh->batch_list, in ops_run_reconstruct6()
2197 atomic_inc(&head_sh->count); in ops_run_reconstruct6()
2198 init_async_submit(&submit, txflags, tx, ops_complete_reconstruct, in ops_run_reconstruct6()
2201 init_async_submit(&submit, 0, tx, NULL, NULL, in ops_run_reconstruct6()
2203 tx = async_gen_syndrome(blocks, offs, count+2, in ops_run_reconstruct6()
2204 RAID5_STRIPE_SIZE(sh->raid_conf), &submit); in ops_run_reconstruct6()
2207 sh = list_first_entry(&sh->batch_list, struct stripe_head, in ops_run_reconstruct6()
2218 (unsigned long long)sh->sector); in ops_complete_check()
2220 sh->check_state = check_state_check_result; in ops_complete_check()
2221 set_bit(STRIPE_HANDLE, &sh->state); in ops_complete_check()
2227 int disks = sh->disks; in ops_run_check_p()
2228 int pd_idx = sh->pd_idx; in ops_run_check_p()
2229 int qd_idx = sh->qd_idx; in ops_run_check_p()
2234 struct dma_async_tx_descriptor *tx; in ops_run_check_p() local
2240 (unsigned long long)sh->sector); in ops_run_check_p()
2242 BUG_ON(sh->batch_head); in ops_run_check_p()
2244 xor_dest = sh->dev[pd_idx].page; in ops_run_check_p()
2245 off_dest = sh->dev[pd_idx].offset; in ops_run_check_p()
2248 for (i = disks; i--; ) { in ops_run_check_p()
2251 off_srcs[count] = sh->dev[i].offset; in ops_run_check_p()
2252 xor_srcs[count++] = sh->dev[i].page; in ops_run_check_p()
2257 tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, in ops_run_check_p()
2258 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_check_p()
2259 &sh->ops.zero_sum_result, &submit); in ops_run_check_p()
2261 atomic_inc(&sh->count); in ops_run_check_p()
2262 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); in ops_run_check_p()
2263 tx = async_trigger_callback(&submit); in ops_run_check_p()
2274 (unsigned long long)sh->sector, checkp); in ops_run_check_pq()
2276 BUG_ON(sh->batch_head); in ops_run_check_pq()
2281 atomic_inc(&sh->count); in ops_run_check_pq()
2285 RAID5_STRIPE_SIZE(sh->raid_conf), in ops_run_check_pq()
2286 &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); in ops_run_check_pq()
2291 int overlap_clear = 0, i, disks = sh->disks; in raid_run_ops()
2292 struct dma_async_tx_descriptor *tx = NULL; in raid_run_ops() local
2293 struct r5conf *conf = sh->raid_conf; in raid_run_ops() local
2294 int level = conf->level; in raid_run_ops()
2297 local_lock(&conf->percpu->lock); in raid_run_ops()
2298 percpu = this_cpu_ptr(conf->percpu); in raid_run_ops()
2306 tx = ops_run_compute5(sh, percpu); in raid_run_ops()
2308 if (sh->ops.target2 < 0 || sh->ops.target < 0) in raid_run_ops()
2309 tx = ops_run_compute6_1(sh, percpu); in raid_run_ops()
2311 tx = ops_run_compute6_2(sh, percpu); in raid_run_ops()
2314 if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) in raid_run_ops()
2315 async_tx_ack(tx); in raid_run_ops()
2320 tx = ops_run_prexor5(sh, percpu, tx); in raid_run_ops()
2322 tx = ops_run_prexor6(sh, percpu, tx); in raid_run_ops()
2326 tx = ops_run_partial_parity(sh, percpu, tx); in raid_run_ops()
2329 tx = ops_run_biodrain(sh, tx); in raid_run_ops()
2335 ops_run_reconstruct5(sh, percpu, tx); in raid_run_ops()
2337 ops_run_reconstruct6(sh, percpu, tx); in raid_run_ops()
2341 if (sh->check_state == check_state_run) in raid_run_ops()
2343 else if (sh->check_state == check_state_run_q) in raid_run_ops()
2345 else if (sh->check_state == check_state_run_pq) in raid_run_ops()
2351 if (overlap_clear && !sh->batch_head) { in raid_run_ops()
2352 for (i = disks; i--; ) { in raid_run_ops()
2353 struct r5dev *dev = &sh->dev[i]; in raid_run_ops()
2354 if (test_and_clear_bit(R5_Overlap, &dev->flags)) in raid_run_ops()
2355 wake_up(&sh->raid_conf->wait_for_overlap); in raid_run_ops()
2358 local_unlock(&conf->percpu->lock); in raid_run_ops()
2364 kfree(sh->pages); in free_stripe()
2366 if (sh->ppl_page) in free_stripe()
2367 __free_page(sh->ppl_page); in free_stripe()
2372 int disks, struct r5conf *conf) in alloc_stripe() argument
2378 spin_lock_init(&sh->stripe_lock); in alloc_stripe()
2379 spin_lock_init(&sh->batch_lock); in alloc_stripe()
2380 INIT_LIST_HEAD(&sh->batch_list); in alloc_stripe()
2381 INIT_LIST_HEAD(&sh->lru); in alloc_stripe()
2382 INIT_LIST_HEAD(&sh->r5c); in alloc_stripe()
2383 INIT_LIST_HEAD(&sh->log_list); in alloc_stripe()
2384 atomic_set(&sh->count, 1); in alloc_stripe()
2385 sh->raid_conf = conf; in alloc_stripe()
2386 sh->log_start = MaxSector; in alloc_stripe()
2388 if (raid5_has_ppl(conf)) { in alloc_stripe()
2389 sh->ppl_page = alloc_page(gfp); in alloc_stripe()
2390 if (!sh->ppl_page) { in alloc_stripe()
2396 if (init_stripe_shared_pages(sh, conf, disks)) { in alloc_stripe()
2404 static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) in grow_one_stripe() argument
2408 sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf); in grow_one_stripe()
2414 free_stripe(conf->slab_cache, sh); in grow_one_stripe()
2417 sh->hash_lock_index = in grow_one_stripe()
2418 conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; in grow_one_stripe()
2420 atomic_inc(&conf->active_stripes); in grow_one_stripe()
2423 conf->max_nr_stripes++; in grow_one_stripe()
2427 static int grow_stripes(struct r5conf *conf, int num) in grow_stripes() argument
2430 size_t namelen = sizeof(conf->cache_name[0]); in grow_stripes()
2431 int devs = max(conf->raid_disks, conf->previous_raid_disks); in grow_stripes()
2433 if (conf->mddev->gendisk) in grow_stripes()
2434 snprintf(conf->cache_name[0], namelen, in grow_stripes()
2435 "raid%d-%s", conf->level, mdname(conf->mddev)); in grow_stripes()
2437 snprintf(conf->cache_name[0], namelen, in grow_stripes()
2438 "raid%d-%p", conf->level, conf->mddev); in grow_stripes()
2439 snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]); in grow_stripes()
2441 conf->active_name = 0; in grow_stripes()
2442 sc = kmem_cache_create(conf->cache_name[conf->active_name], in grow_stripes()
2447 conf->slab_cache = sc; in grow_stripes()
2448 conf->pool_size = devs; in grow_stripes()
2449 while (num--) in grow_stripes()
2450 if (!grow_one_stripe(conf, GFP_KERNEL)) in grow_stripes()
2457 * scribble_alloc - allocate percpu scribble buffer for required size
2488 return -ENOMEM; in scribble_alloc()
2490 kvfree(percpu->scribble); in scribble_alloc()
2492 percpu->scribble = scribble; in scribble_alloc()
2493 percpu->scribble_obj_size = obj_size; in scribble_alloc()
2497 static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) in resize_chunks() argument
2507 if (conf->scribble_disks >= new_disks && in resize_chunks()
2508 conf->scribble_sectors >= new_sectors) in resize_chunks()
2510 mddev_suspend(conf->mddev); in resize_chunks()
2516 percpu = per_cpu_ptr(conf->percpu, cpu); in resize_chunks()
2518 new_sectors / RAID5_STRIPE_SECTORS(conf)); in resize_chunks()
2524 mddev_resume(conf->mddev); in resize_chunks()
2526 conf->scribble_disks = new_disks; in resize_chunks()
2527 conf->scribble_sectors = new_sectors; in resize_chunks()
2532 static int resize_stripes(struct r5conf *conf, int newsize) in resize_stripes() argument
2546 * 3/ reallocate conf->disks to be suitable bigger. If this fails, in resize_stripes()
2547 * we simple return a failure status - no need to clean anything up. in resize_stripes()
2565 md_allow_write(conf->mddev); in resize_stripes()
2568 sc = kmem_cache_create(conf->cache_name[1-conf->active_name], in resize_stripes()
2572 return -ENOMEM; in resize_stripes()
2574 /* Need to ensure auto-resizing doesn't interfere */ in resize_stripes()
2575 mutex_lock(&conf->cache_size_mutex); in resize_stripes()
2577 for (i = conf->max_nr_stripes; i; i--) { in resize_stripes()
2578 nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); in resize_stripes()
2582 list_add(&nsh->lru, &newstripes); in resize_stripes()
2588 list_del(&nsh->lru); in resize_stripes()
2592 mutex_unlock(&conf->cache_size_mutex); in resize_stripes()
2593 return -ENOMEM; in resize_stripes()
2595 /* Step 2 - Must use GFP_NOIO now. in resize_stripes()
2602 lock_device_hash_lock(conf, hash); in resize_stripes()
2603 wait_event_cmd(conf->wait_for_stripe, in resize_stripes()
2604 !list_empty(conf->inactive_list + hash), in resize_stripes()
2605 unlock_device_hash_lock(conf, hash), in resize_stripes()
2606 lock_device_hash_lock(conf, hash)); in resize_stripes()
2607 osh = get_free_stripe(conf, hash); in resize_stripes()
2608 unlock_device_hash_lock(conf, hash); in resize_stripes()
2611 for (i = 0; i < osh->nr_pages; i++) { in resize_stripes()
2612 nsh->pages[i] = osh->pages[i]; in resize_stripes()
2613 osh->pages[i] = NULL; in resize_stripes()
2616 for(i=0; i<conf->pool_size; i++) { in resize_stripes()
2617 nsh->dev[i].page = osh->dev[i].page; in resize_stripes()
2618 nsh->dev[i].orig_page = osh->dev[i].page; in resize_stripes()
2619 nsh->dev[i].offset = osh->dev[i].offset; in resize_stripes()
2621 nsh->hash_lock_index = hash; in resize_stripes()
2622 free_stripe(conf->slab_cache, osh); in resize_stripes()
2624 if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + in resize_stripes()
2625 !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { in resize_stripes()
2630 kmem_cache_destroy(conf->slab_cache); in resize_stripes()
2635 * conf->disks and the scribble region in resize_stripes()
2639 for (i = 0; i < conf->pool_size; i++) in resize_stripes()
2640 ndisks[i] = conf->disks[i]; in resize_stripes()
2642 for (i = conf->pool_size; i < newsize; i++) { in resize_stripes()
2645 err = -ENOMEM; in resize_stripes()
2649 for (i = conf->pool_size; i < newsize; i++) in resize_stripes()
2654 kfree(conf->disks); in resize_stripes()
2655 conf->disks = ndisks; in resize_stripes()
2658 err = -ENOMEM; in resize_stripes()
2660 conf->slab_cache = sc; in resize_stripes()
2661 conf->active_name = 1-conf->active_name; in resize_stripes()
2666 list_del_init(&nsh->lru); in resize_stripes()
2669 for (i = 0; i < nsh->nr_pages; i++) { in resize_stripes()
2670 if (nsh->pages[i]) in resize_stripes()
2672 nsh->pages[i] = alloc_page(GFP_NOIO); in resize_stripes()
2673 if (!nsh->pages[i]) in resize_stripes()
2674 err = -ENOMEM; in resize_stripes()
2677 for (i = conf->raid_disks; i < newsize; i++) { in resize_stripes()
2678 if (nsh->dev[i].page) in resize_stripes()
2680 nsh->dev[i].page = raid5_get_dev_page(nsh, i); in resize_stripes()
2681 nsh->dev[i].orig_page = nsh->dev[i].page; in resize_stripes()
2682 nsh->dev[i].offset = raid5_get_page_offset(nsh, i); in resize_stripes()
2685 for (i=conf->raid_disks; i < newsize; i++) in resize_stripes()
2686 if (nsh->dev[i].page == NULL) { in resize_stripes()
2688 nsh->dev[i].page = p; in resize_stripes()
2689 nsh->dev[i].orig_page = p; in resize_stripes()
2690 nsh->dev[i].offset = 0; in resize_stripes()
2692 err = -ENOMEM; in resize_stripes()
2700 conf->pool_size = newsize; in resize_stripes()
2701 mutex_unlock(&conf->cache_size_mutex); in resize_stripes()
2706 static int drop_one_stripe(struct r5conf *conf) in drop_one_stripe() argument
2709 int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; in drop_one_stripe()
2711 spin_lock_irq(conf->hash_locks + hash); in drop_one_stripe()
2712 sh = get_free_stripe(conf, hash); in drop_one_stripe()
2713 spin_unlock_irq(conf->hash_locks + hash); in drop_one_stripe()
2716 BUG_ON(atomic_read(&sh->count)); in drop_one_stripe()
2718 free_stripe(conf->slab_cache, sh); in drop_one_stripe()
2719 atomic_dec(&conf->active_stripes); in drop_one_stripe()
2720 conf->max_nr_stripes--; in drop_one_stripe()
2724 static void shrink_stripes(struct r5conf *conf) in shrink_stripes() argument
2726 while (conf->max_nr_stripes && in shrink_stripes()
2727 drop_one_stripe(conf)) in shrink_stripes()
2730 kmem_cache_destroy(conf->slab_cache); in shrink_stripes()
2731 conf->slab_cache = NULL; in shrink_stripes()
2741 atomic_read(&rcu_access_pointer(rdev)->nr_pending)); in rdev_pend_deref()
2753 lockdep_is_held(&mddev->reconfig_mutex)); in rdev_mdlock_deref()
2758 struct stripe_head *sh = bi->bi_private; in raid5_end_read_request()
2759 struct r5conf *conf = sh->raid_conf; in raid5_end_read_request() local
2760 int disks = sh->disks, i; in raid5_end_read_request()
2765 if (bi == &sh->dev[i].req) in raid5_end_read_request()
2769 (unsigned long long)sh->sector, i, atomic_read(&sh->count), in raid5_end_read_request()
2770 bi->bi_status); in raid5_end_read_request()
2775 if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) in raid5_end_read_request()
2781 rdev = rdev_pend_deref(conf->disks[i].replacement); in raid5_end_read_request()
2783 rdev = rdev_pend_deref(conf->disks[i].rdev); in raid5_end_read_request()
2785 if (use_new_offset(conf, sh)) in raid5_end_read_request()
2786 s = sh->sector + rdev->new_data_offset; in raid5_end_read_request()
2788 s = sh->sector + rdev->data_offset; in raid5_end_read_request()
2789 if (!bi->bi_status) { in raid5_end_read_request()
2790 set_bit(R5_UPTODATE, &sh->dev[i].flags); in raid5_end_read_request()
2791 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { in raid5_end_read_request()
2798 mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf), in raid5_end_read_request()
2800 rdev->bdev); in raid5_end_read_request()
2801 atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors); in raid5_end_read_request()
2802 clear_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2803 clear_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_read_request()
2804 } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) in raid5_end_read_request()
2805 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2807 if (test_bit(R5_InJournal, &sh->dev[i].flags)) in raid5_end_read_request()
2812 set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags); in raid5_end_read_request()
2814 if (atomic_read(&rdev->read_errors)) in raid5_end_read_request()
2815 atomic_set(&rdev->read_errors, 0); in raid5_end_read_request()
2820 clear_bit(R5_UPTODATE, &sh->dev[i].flags); in raid5_end_read_request()
2821 if (!(bi->bi_status == BLK_STS_PROTECTION)) in raid5_end_read_request()
2822 atomic_inc(&rdev->read_errors); in raid5_end_read_request()
2823 if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) in raid5_end_read_request()
2826 mdname(conf->mddev), in raid5_end_read_request()
2828 rdev->bdev); in raid5_end_read_request()
2829 else if (conf->mddev->degraded >= conf->max_degraded) { in raid5_end_read_request()
2833 mdname(conf->mddev), in raid5_end_read_request()
2835 rdev->bdev); in raid5_end_read_request()
2836 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { in raid5_end_read_request()
2841 mdname(conf->mddev), in raid5_end_read_request()
2843 rdev->bdev); in raid5_end_read_request()
2844 } else if (atomic_read(&rdev->read_errors) in raid5_end_read_request()
2845 > conf->max_nr_stripes) { in raid5_end_read_request()
2846 if (!test_bit(Faulty, &rdev->flags)) { in raid5_end_read_request()
2848 mdname(conf->mddev), in raid5_end_read_request()
2849 atomic_read(&rdev->read_errors), in raid5_end_read_request()
2850 conf->max_nr_stripes); in raid5_end_read_request()
2852 mdname(conf->mddev), rdev->bdev); in raid5_end_read_request()
2856 if (set_bad && test_bit(In_sync, &rdev->flags) in raid5_end_read_request()
2857 && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) in raid5_end_read_request()
2860 if (sh->qd_idx >= 0 && sh->pd_idx == i) in raid5_end_read_request()
2861 set_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2862 else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { in raid5_end_read_request()
2863 set_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2864 clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2866 set_bit(R5_ReadNoMerge, &sh->dev[i].flags); in raid5_end_read_request()
2868 clear_bit(R5_ReadError, &sh->dev[i].flags); in raid5_end_read_request()
2869 clear_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_read_request()
2871 && test_bit(In_sync, &rdev->flags) in raid5_end_read_request()
2873 rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0))) in raid5_end_read_request()
2874 md_error(conf->mddev, rdev); in raid5_end_read_request()
2877 rdev_dec_pending(rdev, conf->mddev); in raid5_end_read_request()
2879 clear_bit(R5_LOCKED, &sh->dev[i].flags); in raid5_end_read_request()
2880 set_bit(STRIPE_HANDLE, &sh->state); in raid5_end_read_request()
2886 struct stripe_head *sh = bi->bi_private; in raid5_end_write_request()
2887 struct r5conf *conf = sh->raid_conf; in raid5_end_write_request() local
2888 int disks = sh->disks, i; in raid5_end_write_request()
2895 if (bi == &sh->dev[i].req) { in raid5_end_write_request()
2896 rdev = rdev_pend_deref(conf->disks[i].rdev); in raid5_end_write_request()
2899 if (bi == &sh->dev[i].rreq) { in raid5_end_write_request()
2900 rdev = rdev_pend_deref(conf->disks[i].replacement); in raid5_end_write_request()
2908 rdev = rdev_pend_deref(conf->disks[i].rdev); in raid5_end_write_request()
2913 (unsigned long long)sh->sector, i, atomic_read(&sh->count), in raid5_end_write_request()
2914 bi->bi_status); in raid5_end_write_request()
2921 if (bi->bi_status) in raid5_end_write_request()
2922 md_error(conf->mddev, rdev); in raid5_end_write_request()
2923 else if (is_badblock(rdev, sh->sector, in raid5_end_write_request()
2924 RAID5_STRIPE_SECTORS(conf), in raid5_end_write_request()
2926 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); in raid5_end_write_request()
2928 if (bi->bi_status) { in raid5_end_write_request()
2929 set_bit(STRIPE_DEGRADED, &sh->state); in raid5_end_write_request()
2930 set_bit(WriteErrorSeen, &rdev->flags); in raid5_end_write_request()
2931 set_bit(R5_WriteError, &sh->dev[i].flags); in raid5_end_write_request()
2932 if (!test_and_set_bit(WantReplacement, &rdev->flags)) in raid5_end_write_request()
2934 &rdev->mddev->recovery); in raid5_end_write_request()
2935 } else if (is_badblock(rdev, sh->sector, in raid5_end_write_request()
2936 RAID5_STRIPE_SECTORS(conf), in raid5_end_write_request()
2938 set_bit(R5_MadeGood, &sh->dev[i].flags); in raid5_end_write_request()
2939 if (test_bit(R5_ReadError, &sh->dev[i].flags)) in raid5_end_write_request()
2942 * a re-write. in raid5_end_write_request()
2944 set_bit(R5_ReWrite, &sh->dev[i].flags); in raid5_end_write_request()
2947 rdev_dec_pending(rdev, conf->mddev); in raid5_end_write_request()
2949 if (sh->batch_head && bi->bi_status && !replacement) in raid5_end_write_request()
2950 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); in raid5_end_write_request()
2953 if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) in raid5_end_write_request()
2954 clear_bit(R5_LOCKED, &sh->dev[i].flags); in raid5_end_write_request()
2955 set_bit(STRIPE_HANDLE, &sh->state); in raid5_end_write_request()
2957 if (sh->batch_head && sh != sh->batch_head) in raid5_end_write_request()
2958 raid5_release_stripe(sh->batch_head); in raid5_end_write_request()
2964 struct r5conf *conf = mddev->private; in raid5_error() local
2969 mdname(mddev), rdev->bdev); in raid5_error()
2971 spin_lock_irqsave(&conf->device_lock, flags); in raid5_error()
2972 set_bit(Faulty, &rdev->flags); in raid5_error()
2973 clear_bit(In_sync, &rdev->flags); in raid5_error()
2974 mddev->degraded = raid5_calc_degraded(conf); in raid5_error()
2976 if (has_failed(conf)) { in raid5_error()
2977 set_bit(MD_BROKEN, &conf->mddev->flags); in raid5_error()
2978 conf->recovery_disabled = mddev->recovery_disabled; in raid5_error()
2981 mdname(mddev), mddev->degraded, conf->raid_disks); in raid5_error()
2984 mdname(mddev), conf->raid_disks - mddev->degraded); in raid5_error()
2987 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_error()
2988 set_bit(MD_RECOVERY_INTR, &mddev->recovery); in raid5_error()
2990 set_bit(Blocked, &rdev->flags); in raid5_error()
2991 set_mask_bits(&mddev->sb_flags, 0, in raid5_error()
3000 sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, in raid5_compute_sector() argument
3010 int algorithm = previous ? conf->prev_algo in raid5_compute_sector()
3011 : conf->algorithm; in raid5_compute_sector()
3012 int sectors_per_chunk = previous ? conf->prev_chunk_sectors in raid5_compute_sector()
3013 : conf->chunk_sectors; in raid5_compute_sector()
3014 int raid_disks = previous ? conf->previous_raid_disks in raid5_compute_sector()
3015 : conf->raid_disks; in raid5_compute_sector()
3016 int data_disks = raid_disks - conf->max_degraded; in raid5_compute_sector()
3035 pd_idx = qd_idx = -1; in raid5_compute_sector()
3036 switch(conf->level) { in raid5_compute_sector()
3043 pd_idx = data_disks - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3053 pd_idx = data_disks - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3075 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3077 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3086 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3093 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3119 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3133 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3135 if (pd_idx == raid_disks-1) { in raid5_compute_sector()
3145 pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); in raid5_compute_sector()
3146 qd_idx = (pd_idx + raid_disks - 1) % raid_disks; in raid5_compute_sector()
3153 pd_idx = data_disks - sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3156 qd_idx = raid_disks - 1; in raid5_compute_sector()
3160 pd_idx = sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3163 qd_idx = raid_disks - 1; in raid5_compute_sector()
3167 pd_idx = data_disks - sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3168 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); in raid5_compute_sector()
3169 qd_idx = raid_disks - 1; in raid5_compute_sector()
3173 pd_idx = sector_div(stripe2, raid_disks-1); in raid5_compute_sector()
3174 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); in raid5_compute_sector()
3175 qd_idx = raid_disks - 1; in raid5_compute_sector()
3181 qd_idx = raid_disks - 1; in raid5_compute_sector()
3191 sh->pd_idx = pd_idx; in raid5_compute_sector()
3192 sh->qd_idx = qd_idx; in raid5_compute_sector()
3193 sh->ddf_layout = ddf_layout; in raid5_compute_sector()
3204 struct r5conf *conf = sh->raid_conf; in raid5_compute_blocknr() local
3205 int raid_disks = sh->disks; in raid5_compute_blocknr()
3206 int data_disks = raid_disks - conf->max_degraded; in raid5_compute_blocknr()
3207 sector_t new_sector = sh->sector, check; in raid5_compute_blocknr()
3208 int sectors_per_chunk = previous ? conf->prev_chunk_sectors in raid5_compute_blocknr()
3209 : conf->chunk_sectors; in raid5_compute_blocknr()
3210 int algorithm = previous ? conf->prev_algo in raid5_compute_blocknr()
3211 : conf->algorithm; in raid5_compute_blocknr()
3222 if (i == sh->pd_idx) in raid5_compute_blocknr()
3224 switch(conf->level) { in raid5_compute_blocknr()
3230 if (i > sh->pd_idx) in raid5_compute_blocknr()
3231 i--; in raid5_compute_blocknr()
3235 if (i < sh->pd_idx) in raid5_compute_blocknr()
3237 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3240 i -= 1; in raid5_compute_blocknr()
3249 if (i == sh->qd_idx) in raid5_compute_blocknr()
3256 if (sh->pd_idx == raid_disks-1) in raid5_compute_blocknr()
3257 i--; /* Q D D D P */ in raid5_compute_blocknr()
3258 else if (i > sh->pd_idx) in raid5_compute_blocknr()
3259 i -= 2; /* D D P Q D */ in raid5_compute_blocknr()
3263 if (sh->pd_idx == raid_disks-1) in raid5_compute_blocknr()
3264 i--; /* Q D D D P */ in raid5_compute_blocknr()
3267 if (i < sh->pd_idx) in raid5_compute_blocknr()
3269 i -= (sh->pd_idx + 2); in raid5_compute_blocknr()
3273 i -= 2; in raid5_compute_blocknr()
3279 if (sh->pd_idx == 0) in raid5_compute_blocknr()
3280 i--; /* P D D D Q */ in raid5_compute_blocknr()
3283 if (i < sh->pd_idx) in raid5_compute_blocknr()
3285 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3290 if (i > sh->pd_idx) in raid5_compute_blocknr()
3291 i--; in raid5_compute_blocknr()
3295 if (i < sh->pd_idx) in raid5_compute_blocknr()
3297 i -= (sh->pd_idx + 1); in raid5_compute_blocknr()
3300 i -= 1; in raid5_compute_blocknr()
3311 check = raid5_compute_sector(conf, r_sector, in raid5_compute_blocknr()
3313 if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx in raid5_compute_blocknr()
3314 || sh2.qd_idx != sh->qd_idx) { in raid5_compute_blocknr()
3316 mdname(conf->mddev)); in raid5_compute_blocknr()
3329 * 1. degraded stripe has a non-overwrite to the missing dev, AND this
3332 * In this case, when reading data for the non-overwrite dev, it is
3340 * It is important to be able to flush all stripes in raid5-cache.
3343 * stripe, we need to reserve (conf->raid_disk + 1) pages per stripe
3345 * operation, we only need (conf->max_degraded + 1) pages per stripe.
3356 * based on data in stripe cache. The array is read-only to upper
3360 static inline bool delay_towrite(struct r5conf *conf, in delay_towrite() argument
3365 if (!test_bit(R5_OVERWRITE, &dev->flags) && in delay_towrite()
3366 !test_bit(R5_Insync, &dev->flags) && s->injournal) in delay_towrite()
3369 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && in delay_towrite()
3370 s->injournal > 0) in delay_towrite()
3373 if (s->log_failed && s->injournal) in delay_towrite()
3382 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks; in schedule_reconstruction()
3383 struct r5conf *conf = sh->raid_conf; in schedule_reconstruction() local
3384 int level = conf->level; in schedule_reconstruction()
3395 for (i = disks; i--; ) { in schedule_reconstruction()
3396 struct r5dev *dev = &sh->dev[i]; in schedule_reconstruction()
3398 if (dev->towrite && !delay_towrite(conf, dev, s)) { in schedule_reconstruction()
3399 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3400 set_bit(R5_Wantdrain, &dev->flags); in schedule_reconstruction()
3402 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3403 s->locked++; in schedule_reconstruction()
3404 } else if (test_bit(R5_InJournal, &dev->flags)) { in schedule_reconstruction()
3405 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3406 s->locked++; in schedule_reconstruction()
3414 if (!s->locked) in schedule_reconstruction()
3417 sh->reconstruct_state = reconstruct_state_drain_run; in schedule_reconstruction()
3418 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); in schedule_reconstruction()
3420 sh->reconstruct_state = reconstruct_state_run; in schedule_reconstruction()
3422 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); in schedule_reconstruction()
3424 if (s->locked + conf->max_degraded == disks) in schedule_reconstruction()
3425 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) in schedule_reconstruction()
3426 atomic_inc(&conf->pending_full_writes); in schedule_reconstruction()
3428 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || in schedule_reconstruction()
3429 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); in schedule_reconstruction()
3431 (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) || in schedule_reconstruction()
3432 test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags)))); in schedule_reconstruction()
3434 for (i = disks; i--; ) { in schedule_reconstruction()
3435 struct r5dev *dev = &sh->dev[i]; in schedule_reconstruction()
3439 if (dev->towrite && in schedule_reconstruction()
3440 (test_bit(R5_UPTODATE, &dev->flags) || in schedule_reconstruction()
3441 test_bit(R5_Wantcompute, &dev->flags))) { in schedule_reconstruction()
3442 set_bit(R5_Wantdrain, &dev->flags); in schedule_reconstruction()
3443 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3444 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3445 s->locked++; in schedule_reconstruction()
3446 } else if (test_bit(R5_InJournal, &dev->flags)) { in schedule_reconstruction()
3447 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3448 s->locked++; in schedule_reconstruction()
3451 if (!s->locked) in schedule_reconstruction()
3452 /* False alarm - nothing to do */ in schedule_reconstruction()
3454 sh->reconstruct_state = reconstruct_state_prexor_drain_run; in schedule_reconstruction()
3455 set_bit(STRIPE_OP_PREXOR, &s->ops_request); in schedule_reconstruction()
3456 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); in schedule_reconstruction()
3457 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); in schedule_reconstruction()
3463 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); in schedule_reconstruction()
3464 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); in schedule_reconstruction()
3465 s->locked++; in schedule_reconstruction()
3468 int qd_idx = sh->qd_idx; in schedule_reconstruction()
3469 struct r5dev *dev = &sh->dev[qd_idx]; in schedule_reconstruction()
3471 set_bit(R5_LOCKED, &dev->flags); in schedule_reconstruction()
3472 clear_bit(R5_UPTODATE, &dev->flags); in schedule_reconstruction()
3473 s->locked++; in schedule_reconstruction()
3476 if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page && in schedule_reconstruction()
3477 test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && in schedule_reconstruction()
3478 !test_bit(STRIPE_FULL_WRITE, &sh->state) && in schedule_reconstruction()
3479 test_bit(R5_Insync, &sh->dev[pd_idx].flags)) in schedule_reconstruction()
3480 set_bit(STRIPE_OP_PARTIAL_PARITY, &s->ops_request); in schedule_reconstruction()
3483 __func__, (unsigned long long)sh->sector, in schedule_reconstruction()
3484 s->locked, s->ops_request); in schedule_reconstruction()
3490 struct r5conf *conf = sh->raid_conf; in stripe_bio_overlaps() local
3494 bi->bi_iter.bi_sector, sh->sector); in stripe_bio_overlaps()
3497 if (sh->batch_head) in stripe_bio_overlaps()
3501 bip = &sh->dev[dd_idx].towrite; in stripe_bio_overlaps()
3503 bip = &sh->dev[dd_idx].toread; in stripe_bio_overlaps()
3505 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) { in stripe_bio_overlaps()
3506 if (bio_end_sector(*bip) > bi->bi_iter.bi_sector) in stripe_bio_overlaps()
3508 bip = &(*bip)->bi_next; in stripe_bio_overlaps()
3511 if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi)) in stripe_bio_overlaps()
3514 if (forwrite && raid5_has_ppl(conf)) { in stripe_bio_overlaps()
3528 for (i = 0; i < sh->disks; i++) { in stripe_bio_overlaps()
3529 if (i != sh->pd_idx && in stripe_bio_overlaps()
3530 (i == dd_idx || sh->dev[i].towrite)) { in stripe_bio_overlaps()
3531 sector = sh->dev[i].sector; in stripe_bio_overlaps()
3540 if (first + conf->chunk_sectors * (count - 1) != last) in stripe_bio_overlaps()
3550 struct r5conf *conf = sh->raid_conf; in __add_stripe_bio() local
3555 bip = &sh->dev[dd_idx].towrite; in __add_stripe_bio()
3559 bip = &sh->dev[dd_idx].toread; in __add_stripe_bio()
3562 while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) in __add_stripe_bio()
3563 bip = &(*bip)->bi_next; in __add_stripe_bio()
3566 clear_bit(STRIPE_BATCH_READY, &sh->state); in __add_stripe_bio()
3568 BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); in __add_stripe_bio()
3570 bi->bi_next = *bip; in __add_stripe_bio()
3573 md_write_inc(conf->mddev, bi); in __add_stripe_bio()
3577 sector_t sector = sh->dev[dd_idx].sector; in __add_stripe_bio()
3578 for (bi=sh->dev[dd_idx].towrite; in __add_stripe_bio()
3579 sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) && in __add_stripe_bio()
3580 bi && bi->bi_iter.bi_sector <= sector; in __add_stripe_bio()
3581 bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) { in __add_stripe_bio()
3585 if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf)) in __add_stripe_bio()
3586 if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags)) in __add_stripe_bio()
3587 sh->overwrite_disks++; in __add_stripe_bio()
3591 (*bip)->bi_iter.bi_sector, sh->sector, dd_idx, in __add_stripe_bio()
3592 sh->dev[dd_idx].sector); in __add_stripe_bio()
3594 if (conf->mddev->bitmap && firstwrite) { in __add_stripe_bio()
3607 set_bit(STRIPE_BITMAP_PENDING, &sh->state); in __add_stripe_bio()
3608 spin_unlock_irq(&sh->stripe_lock); in __add_stripe_bio()
3609 md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, in __add_stripe_bio()
3610 RAID5_STRIPE_SECTORS(conf), 0); in __add_stripe_bio()
3611 spin_lock_irq(&sh->stripe_lock); in __add_stripe_bio()
3612 clear_bit(STRIPE_BITMAP_PENDING, &sh->state); in __add_stripe_bio()
3613 if (!sh->batch_head) { in __add_stripe_bio()
3614 sh->bm_seq = conf->seq_flush+1; in __add_stripe_bio()
3615 set_bit(STRIPE_BIT_DELAY, &sh->state); in __add_stripe_bio()
3628 spin_lock_irq(&sh->stripe_lock); in add_stripe_bio()
3631 set_bit(R5_Overlap, &sh->dev[dd_idx].flags); in add_stripe_bio()
3632 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3637 spin_unlock_irq(&sh->stripe_lock); in add_stripe_bio()
3641 static void end_reshape(struct r5conf *conf);
3643 static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, in stripe_set_idx() argument
3647 previous ? conf->prev_chunk_sectors : conf->chunk_sectors; in stripe_set_idx()
3650 int disks = previous ? conf->previous_raid_disks : conf->raid_disks; in stripe_set_idx()
3652 raid5_compute_sector(conf, in stripe_set_idx()
3653 stripe * (disks - conf->max_degraded) in stripe_set_idx()
3660 handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, in handle_failed_stripe() argument
3664 BUG_ON(sh->batch_head); in handle_failed_stripe()
3665 for (i = disks; i--; ) { in handle_failed_stripe()
3669 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { in handle_failed_stripe()
3672 rdev = rcu_dereference(conf->disks[i].rdev); in handle_failed_stripe()
3673 if (rdev && test_bit(In_sync, &rdev->flags) && in handle_failed_stripe()
3674 !test_bit(Faulty, &rdev->flags)) in handle_failed_stripe()
3675 atomic_inc(&rdev->nr_pending); in handle_failed_stripe()
3682 sh->sector, in handle_failed_stripe()
3683 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_stripe()
3684 md_error(conf->mddev, rdev); in handle_failed_stripe()
3685 rdev_dec_pending(rdev, conf->mddev); in handle_failed_stripe()
3688 spin_lock_irq(&sh->stripe_lock); in handle_failed_stripe()
3690 bi = sh->dev[i].towrite; in handle_failed_stripe()
3691 sh->dev[i].towrite = NULL; in handle_failed_stripe()
3692 sh->overwrite_disks = 0; in handle_failed_stripe()
3693 spin_unlock_irq(&sh->stripe_lock); in handle_failed_stripe()
3699 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in handle_failed_stripe()
3700 wake_up(&conf->wait_for_overlap); in handle_failed_stripe()
3702 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3703 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3704 struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3706 md_write_end(conf->mddev); in handle_failed_stripe()
3711 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_failed_stripe()
3712 RAID5_STRIPE_SECTORS(conf), 0, 0); in handle_failed_stripe()
3715 bi = sh->dev[i].written; in handle_failed_stripe()
3716 sh->dev[i].written = NULL; in handle_failed_stripe()
3717 if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { in handle_failed_stripe()
3718 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); in handle_failed_stripe()
3719 sh->dev[i].page = sh->dev[i].orig_page; in handle_failed_stripe()
3723 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3724 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3725 struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3727 md_write_end(conf->mddev); in handle_failed_stripe()
3732 /* fail any reads if this device is non-operational and in handle_failed_stripe()
3735 if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && in handle_failed_stripe()
3736 s->failed > conf->max_degraded && in handle_failed_stripe()
3737 (!test_bit(R5_Insync, &sh->dev[i].flags) || in handle_failed_stripe()
3738 test_bit(R5_ReadError, &sh->dev[i].flags))) { in handle_failed_stripe()
3739 spin_lock_irq(&sh->stripe_lock); in handle_failed_stripe()
3740 bi = sh->dev[i].toread; in handle_failed_stripe()
3741 sh->dev[i].toread = NULL; in handle_failed_stripe()
3742 spin_unlock_irq(&sh->stripe_lock); in handle_failed_stripe()
3743 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in handle_failed_stripe()
3744 wake_up(&conf->wait_for_overlap); in handle_failed_stripe()
3746 s->to_read--; in handle_failed_stripe()
3747 while (bi && bi->bi_iter.bi_sector < in handle_failed_stripe()
3748 sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { in handle_failed_stripe()
3750 r5_next_bio(conf, bi, sh->dev[i].sector); in handle_failed_stripe()
3757 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_failed_stripe()
3758 RAID5_STRIPE_SECTORS(conf), 0, 0); in handle_failed_stripe()
3760 * still be locked - so just clear all R5_LOCKED flags in handle_failed_stripe()
3762 clear_bit(R5_LOCKED, &sh->dev[i].flags); in handle_failed_stripe()
3764 s->to_write = 0; in handle_failed_stripe()
3765 s->written = 0; in handle_failed_stripe()
3767 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) in handle_failed_stripe()
3768 if (atomic_dec_and_test(&conf->pending_full_writes)) in handle_failed_stripe()
3769 md_wakeup_thread(conf->mddev->thread); in handle_failed_stripe()
3773 handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, in handle_failed_sync() argument
3779 BUG_ON(sh->batch_head); in handle_failed_sync()
3780 clear_bit(STRIPE_SYNCING, &sh->state); in handle_failed_sync()
3781 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) in handle_failed_sync()
3782 wake_up(&conf->wait_for_overlap); in handle_failed_sync()
3783 s->syncing = 0; in handle_failed_sync()
3784 s->replacing = 0; in handle_failed_sync()
3790 * non-sync devices, or abort the recovery in handle_failed_sync()
3792 if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) { in handle_failed_sync()
3797 for (i = 0; i < conf->raid_disks; i++) { in handle_failed_sync()
3798 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in handle_failed_sync()
3800 && !test_bit(Faulty, &rdev->flags) in handle_failed_sync()
3801 && !test_bit(In_sync, &rdev->flags) in handle_failed_sync()
3802 && !rdev_set_badblocks(rdev, sh->sector, in handle_failed_sync()
3803 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_sync()
3805 rdev = rcu_dereference(conf->disks[i].replacement); in handle_failed_sync()
3807 && !test_bit(Faulty, &rdev->flags) in handle_failed_sync()
3808 && !test_bit(In_sync, &rdev->flags) in handle_failed_sync()
3809 && !rdev_set_badblocks(rdev, sh->sector, in handle_failed_sync()
3810 RAID5_STRIPE_SECTORS(conf), 0)) in handle_failed_sync()
3815 conf->recovery_disabled = in handle_failed_sync()
3816 conf->mddev->recovery_disabled; in handle_failed_sync()
3818 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort); in handle_failed_sync()
3827 rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement); in want_replace()
3829 && !test_bit(Faulty, &rdev->flags) in want_replace()
3830 && !test_bit(In_sync, &rdev->flags) in want_replace()
3831 && (rdev->recovery_offset <= sh->sector in want_replace()
3832 || rdev->mddev->recovery_cp <= sh->sector)) in want_replace()
3841 struct r5dev *dev = &sh->dev[disk_idx]; in need_this_block()
3842 struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], in need_this_block()
3843 &sh->dev[s->failed_num[1]] }; in need_this_block()
3845 bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW); in need_this_block()
3848 if (test_bit(R5_LOCKED, &dev->flags) || in need_this_block()
3849 test_bit(R5_UPTODATE, &dev->flags)) in need_this_block()
3855 if (dev->toread || in need_this_block()
3856 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags))) in need_this_block()
3860 if (s->syncing || s->expanding || in need_this_block()
3861 (s->replacing && want_replace(sh, disk_idx))) in need_this_block()
3867 if ((s->failed >= 1 && fdev[0]->toread) || in need_this_block()
3868 (s->failed >= 2 && fdev[1]->toread)) in need_this_block()
3874 /* Sometimes neither read-modify-write nor reconstruct-write in need_this_block()
3876 * can. Then the parity-update is certain to have enough to in need_this_block()
3882 if (!s->failed || !s->to_write) in need_this_block()
3885 if (test_bit(R5_Insync, &dev->flags) && in need_this_block()
3886 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in need_this_block()
3887 /* Pre-reads at not permitted until after short delay in need_this_block()
3894 for (i = 0; i < s->failed && i < 2; i++) { in need_this_block()
3895 if (fdev[i]->towrite && in need_this_block()
3896 !test_bit(R5_UPTODATE, &fdev[i]->flags) && in need_this_block()
3897 !test_bit(R5_OVERWRITE, &fdev[i]->flags)) in need_this_block()
3905 if (s->failed >= 2 && in need_this_block()
3906 (fdev[i]->towrite || in need_this_block()
3907 s->failed_num[i] == sh->pd_idx || in need_this_block()
3908 s->failed_num[i] == sh->qd_idx) && in need_this_block()
3909 !test_bit(R5_UPTODATE, &fdev[i]->flags)) in need_this_block()
3912 * reconstruct-write. in need_this_block()
3917 /* If we are forced to do a reconstruct-write, because parity in need_this_block()
3925 sh->sector < sh->raid_conf->mddev->recovery_cp) in need_this_block()
3926 /* reconstruct-write isn't being forced */ in need_this_block()
3928 for (i = 0; i < s->failed && i < 2; i++) { in need_this_block()
3929 if (s->failed_num[i] != sh->pd_idx && in need_this_block()
3930 s->failed_num[i] != sh->qd_idx && in need_this_block()
3931 !test_bit(R5_UPTODATE, &fdev[i]->flags) && in need_this_block()
3932 !test_bit(R5_OVERWRITE, &fdev[i]->flags)) in need_this_block()
3939 /* fetch_block - checks the given member device to see if its data needs
3948 struct r5dev *dev = &sh->dev[disk_idx]; in fetch_block()
3955 BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); in fetch_block()
3956 BUG_ON(test_bit(R5_Wantread, &dev->flags)); in fetch_block()
3957 BUG_ON(sh->batch_head); in fetch_block()
3960 * In the raid6 case if the only non-uptodate disk is P in fetch_block()
3962 * drives. It is safe to compute rather than re-read P. in fetch_block()
3968 if ((s->uptodate == disks - 1) && in fetch_block()
3969 ((sh->qd_idx >= 0 && sh->pd_idx == disk_idx) || in fetch_block()
3970 (s->failed && (disk_idx == s->failed_num[0] || in fetch_block()
3971 disk_idx == s->failed_num[1])))) { in fetch_block()
3976 (unsigned long long)sh->sector, disk_idx); in fetch_block()
3977 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in fetch_block()
3978 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in fetch_block()
3979 set_bit(R5_Wantcompute, &dev->flags); in fetch_block()
3980 sh->ops.target = disk_idx; in fetch_block()
3981 sh->ops.target2 = -1; /* no 2nd target */ in fetch_block()
3982 s->req_compute = 1; in fetch_block()
3989 s->uptodate++; in fetch_block()
3991 } else if (s->uptodate == disks-2 && s->failed >= 2) { in fetch_block()
3992 /* Computing 2-failure is *very* expensive; only in fetch_block()
3996 for (other = disks; other--; ) { in fetch_block()
4000 &sh->dev[other].flags)) in fetch_block()
4005 (unsigned long long)sh->sector, in fetch_block()
4007 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in fetch_block()
4008 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in fetch_block()
4009 set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); in fetch_block()
4010 set_bit(R5_Wantcompute, &sh->dev[other].flags); in fetch_block()
4011 sh->ops.target = disk_idx; in fetch_block()
4012 sh->ops.target2 = other; in fetch_block()
4013 s->uptodate += 2; in fetch_block()
4014 s->req_compute = 1; in fetch_block()
4016 } else if (test_bit(R5_Insync, &dev->flags)) { in fetch_block()
4017 set_bit(R5_LOCKED, &dev->flags); in fetch_block()
4018 set_bit(R5_Wantread, &dev->flags); in fetch_block()
4019 s->locked++; in fetch_block()
4021 disk_idx, s->syncing); in fetch_block()
4029 * handle_stripe_fill - read or compute data to satisfy pending requests.
4041 if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && in handle_stripe_fill()
4042 !sh->reconstruct_state) { in handle_stripe_fill()
4051 if (s->to_read && s->injournal && s->failed) { in handle_stripe_fill()
4052 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) in handle_stripe_fill()
4057 for (i = disks; i--; ) in handle_stripe_fill()
4062 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_fill()
4072 static void handle_stripe_clean_event(struct r5conf *conf, in handle_stripe_clean_event() argument
4081 for (i = disks; i--; ) in handle_stripe_clean_event()
4082 if (sh->dev[i].written) { in handle_stripe_clean_event()
4083 dev = &sh->dev[i]; in handle_stripe_clean_event()
4084 if (!test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_clean_event()
4085 (test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_clean_event()
4086 test_bit(R5_Discard, &dev->flags) || in handle_stripe_clean_event()
4087 test_bit(R5_SkipCopy, &dev->flags))) { in handle_stripe_clean_event()
4091 if (test_and_clear_bit(R5_Discard, &dev->flags)) in handle_stripe_clean_event()
4092 clear_bit(R5_UPTODATE, &dev->flags); in handle_stripe_clean_event()
4093 if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { in handle_stripe_clean_event()
4094 WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); in handle_stripe_clean_event()
4099 dev->page = dev->orig_page; in handle_stripe_clean_event()
4100 wbi = dev->written; in handle_stripe_clean_event()
4101 dev->written = NULL; in handle_stripe_clean_event()
4102 while (wbi && wbi->bi_iter.bi_sector < in handle_stripe_clean_event()
4103 dev->sector + RAID5_STRIPE_SECTORS(conf)) { in handle_stripe_clean_event()
4104 wbi2 = r5_next_bio(conf, wbi, dev->sector); in handle_stripe_clean_event()
4105 md_write_end(conf->mddev); in handle_stripe_clean_event()
4109 md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, in handle_stripe_clean_event()
4110 RAID5_STRIPE_SECTORS(conf), in handle_stripe_clean_event()
4111 !test_bit(STRIPE_DEGRADED, &sh->state), in handle_stripe_clean_event()
4113 if (head_sh->batch_head) { in handle_stripe_clean_event()
4114 sh = list_first_entry(&sh->batch_list, in handle_stripe_clean_event()
4118 dev = &sh->dev[i]; in handle_stripe_clean_event()
4123 dev = &sh->dev[i]; in handle_stripe_clean_event()
4124 } else if (test_bit(R5_Discard, &dev->flags)) in handle_stripe_clean_event()
4131 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { in handle_stripe_clean_event()
4133 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); in handle_stripe_clean_event()
4134 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); in handle_stripe_clean_event()
4135 if (sh->qd_idx >= 0) { in handle_stripe_clean_event()
4136 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); in handle_stripe_clean_event()
4137 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); in handle_stripe_clean_event()
4140 clear_bit(STRIPE_DISCARD, &sh->state); in handle_stripe_clean_event()
4147 hash = sh->hash_lock_index; in handle_stripe_clean_event()
4148 spin_lock_irq(conf->hash_locks + hash); in handle_stripe_clean_event()
4150 spin_unlock_irq(conf->hash_locks + hash); in handle_stripe_clean_event()
4151 if (head_sh->batch_head) { in handle_stripe_clean_event()
4152 sh = list_first_entry(&sh->batch_list, in handle_stripe_clean_event()
4159 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) in handle_stripe_clean_event()
4160 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_clean_event()
4164 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) in handle_stripe_clean_event()
4165 if (atomic_dec_and_test(&conf->pending_full_writes)) in handle_stripe_clean_event()
4166 md_wakeup_thread(conf->mddev->thread); in handle_stripe_clean_event()
4168 if (head_sh->batch_head && do_endio) in handle_stripe_clean_event()
4174 * old data. This page is stored in dev->orig_page.
4182 return (test_bit(R5_UPTODATE, &dev->flags)) && in uptodate_for_rmw()
4183 (!test_bit(R5_InJournal, &dev->flags) || in uptodate_for_rmw()
4184 test_bit(R5_OrigPageUPTDODATE, &dev->flags)); in uptodate_for_rmw()
4187 static int handle_stripe_dirtying(struct r5conf *conf, in handle_stripe_dirtying() argument
4193 sector_t recovery_cp = conf->mddev->recovery_cp; in handle_stripe_dirtying()
4198 * In this case, we need to always do reconstruct-write, to ensure in handle_stripe_dirtying()
4199 * that in case of drive failure or read-error correction, we in handle_stripe_dirtying()
4202 if (conf->rmw_level == PARITY_DISABLE_RMW || in handle_stripe_dirtying()
4203 (recovery_cp < MaxSector && sh->sector >= recovery_cp && in handle_stripe_dirtying()
4204 s->failed == 0)) { in handle_stripe_dirtying()
4205 /* Calculate the real rcw later - for now make it in handle_stripe_dirtying()
4209 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n", in handle_stripe_dirtying()
4210 conf->rmw_level, (unsigned long long)recovery_cp, in handle_stripe_dirtying()
4211 (unsigned long long)sh->sector); in handle_stripe_dirtying()
4212 } else for (i = disks; i--; ) { in handle_stripe_dirtying()
4214 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4215 if (((dev->towrite && !delay_towrite(conf, dev, s)) || in handle_stripe_dirtying()
4216 i == sh->pd_idx || i == sh->qd_idx || in handle_stripe_dirtying()
4217 test_bit(R5_InJournal, &dev->flags)) && in handle_stripe_dirtying()
4218 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4220 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4221 if (test_bit(R5_Insync, &dev->flags)) in handle_stripe_dirtying()
4227 if (!test_bit(R5_OVERWRITE, &dev->flags) && in handle_stripe_dirtying()
4228 i != sh->pd_idx && i != sh->qd_idx && in handle_stripe_dirtying()
4229 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4230 !(test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_dirtying()
4231 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4232 if (test_bit(R5_Insync, &dev->flags)) in handle_stripe_dirtying()
4240 (unsigned long long)sh->sector, sh->state, rmw, rcw); in handle_stripe_dirtying()
4241 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe_dirtying()
4242 if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) { in handle_stripe_dirtying()
4243 /* prefer read-modify-write, but need to get some data */ in handle_stripe_dirtying()
4244 if (conf->mddev->queue) in handle_stripe_dirtying()
4245 blk_add_trace_msg(conf->mddev->queue, in handle_stripe_dirtying()
4247 (unsigned long long)sh->sector, rmw); in handle_stripe_dirtying()
4248 for (i = disks; i--; ) { in handle_stripe_dirtying()
4249 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4250 if (test_bit(R5_InJournal, &dev->flags) && in handle_stripe_dirtying()
4251 dev->page == dev->orig_page && in handle_stripe_dirtying()
4252 !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { in handle_stripe_dirtying()
4257 dev->orig_page = p; in handle_stripe_dirtying()
4263 * disk_info->extra_page in handle_stripe_dirtying()
4266 &conf->cache_state)) { in handle_stripe_dirtying()
4272 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4273 s->waiting_extra_page = 1; in handle_stripe_dirtying()
4274 return -EAGAIN; in handle_stripe_dirtying()
4278 for (i = disks; i--; ) { in handle_stripe_dirtying()
4279 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4280 if (((dev->towrite && !delay_towrite(conf, dev, s)) || in handle_stripe_dirtying()
4281 i == sh->pd_idx || i == sh->qd_idx || in handle_stripe_dirtying()
4282 test_bit(R5_InJournal, &dev->flags)) && in handle_stripe_dirtying()
4283 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4285 test_bit(R5_Wantcompute, &dev->flags)) && in handle_stripe_dirtying()
4286 test_bit(R5_Insync, &dev->flags)) { in handle_stripe_dirtying()
4288 &sh->state)) { in handle_stripe_dirtying()
4289 pr_debug("Read_old block %d for r-m-w\n", in handle_stripe_dirtying()
4291 set_bit(R5_LOCKED, &dev->flags); in handle_stripe_dirtying()
4292 set_bit(R5_Wantread, &dev->flags); in handle_stripe_dirtying()
4293 s->locked++; in handle_stripe_dirtying()
4295 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4299 if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) { in handle_stripe_dirtying()
4303 for (i = disks; i--; ) { in handle_stripe_dirtying()
4304 struct r5dev *dev = &sh->dev[i]; in handle_stripe_dirtying()
4305 if (!test_bit(R5_OVERWRITE, &dev->flags) && in handle_stripe_dirtying()
4306 i != sh->pd_idx && i != sh->qd_idx && in handle_stripe_dirtying()
4307 !test_bit(R5_LOCKED, &dev->flags) && in handle_stripe_dirtying()
4308 !(test_bit(R5_UPTODATE, &dev->flags) || in handle_stripe_dirtying()
4309 test_bit(R5_Wantcompute, &dev->flags))) { in handle_stripe_dirtying()
4311 if (test_bit(R5_Insync, &dev->flags) && in handle_stripe_dirtying()
4313 &sh->state)) { in handle_stripe_dirtying()
4316 set_bit(R5_LOCKED, &dev->flags); in handle_stripe_dirtying()
4317 set_bit(R5_Wantread, &dev->flags); in handle_stripe_dirtying()
4318 s->locked++; in handle_stripe_dirtying()
4321 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4324 if (rcw && conf->mddev->queue) in handle_stripe_dirtying()
4325 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", in handle_stripe_dirtying()
4326 (unsigned long long)sh->sector, in handle_stripe_dirtying()
4327 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); in handle_stripe_dirtying()
4331 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in handle_stripe_dirtying()
4332 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe_dirtying()
4344 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && in handle_stripe_dirtying()
4345 (s->locked == 0 && (rcw == 0 || rmw == 0) && in handle_stripe_dirtying()
4346 !test_bit(STRIPE_BIT_DELAY, &sh->state))) in handle_stripe_dirtying()
4351 static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, in handle_parity_checks5() argument
4356 BUG_ON(sh->batch_head); in handle_parity_checks5()
4357 set_bit(STRIPE_HANDLE, &sh->state); in handle_parity_checks5()
4359 switch (sh->check_state) { in handle_parity_checks5()
4362 if (s->failed == 0) { in handle_parity_checks5()
4363 BUG_ON(s->uptodate != disks); in handle_parity_checks5()
4364 sh->check_state = check_state_run; in handle_parity_checks5()
4365 set_bit(STRIPE_OP_CHECK, &s->ops_request); in handle_parity_checks5()
4366 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); in handle_parity_checks5()
4367 s->uptodate--; in handle_parity_checks5()
4370 dev = &sh->dev[s->failed_num[0]]; in handle_parity_checks5()
4373 sh->check_state = check_state_idle; in handle_parity_checks5()
4375 dev = &sh->dev[sh->pd_idx]; in handle_parity_checks5()
4378 if (test_bit(STRIPE_INSYNC, &sh->state)) in handle_parity_checks5()
4382 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); in handle_parity_checks5()
4383 BUG_ON(s->uptodate != disks); in handle_parity_checks5()
4385 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks5()
4386 s->locked++; in handle_parity_checks5()
4387 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks5()
4389 clear_bit(STRIPE_DEGRADED, &sh->state); in handle_parity_checks5()
4390 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4395 sh->check_state = check_state_idle; in handle_parity_checks5()
4400 if (s->failed) in handle_parity_checks5()
4407 if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) in handle_parity_checks5()
4411 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4413 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); in handle_parity_checks5()
4414 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { in handle_parity_checks5()
4416 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks5()
4418 "%llu-%llu\n", mdname(conf->mddev), in handle_parity_checks5()
4419 (unsigned long long) sh->sector, in handle_parity_checks5()
4420 (unsigned long long) sh->sector + in handle_parity_checks5()
4421 RAID5_STRIPE_SECTORS(conf)); in handle_parity_checks5()
4423 sh->check_state = check_state_compute_run; in handle_parity_checks5()
4424 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in handle_parity_checks5()
4425 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in handle_parity_checks5()
4427 &sh->dev[sh->pd_idx].flags); in handle_parity_checks5()
4428 sh->ops.target = sh->pd_idx; in handle_parity_checks5()
4429 sh->ops.target2 = -1; in handle_parity_checks5()
4430 s->uptodate++; in handle_parity_checks5()
4438 __func__, sh->check_state, in handle_parity_checks5()
4439 (unsigned long long) sh->sector); in handle_parity_checks5()
4444 static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, in handle_parity_checks6() argument
4448 int pd_idx = sh->pd_idx; in handle_parity_checks6()
4449 int qd_idx = sh->qd_idx; in handle_parity_checks6()
4452 BUG_ON(sh->batch_head); in handle_parity_checks6()
4453 set_bit(STRIPE_HANDLE, &sh->state); in handle_parity_checks6()
4455 BUG_ON(s->failed > 2); in handle_parity_checks6()
4463 switch (sh->check_state) { in handle_parity_checks6()
4466 if (s->failed == s->q_failed) { in handle_parity_checks6()
4471 sh->check_state = check_state_run; in handle_parity_checks6()
4473 if (!s->q_failed && s->failed < 2) { in handle_parity_checks6()
4477 if (sh->check_state == check_state_run) in handle_parity_checks6()
4478 sh->check_state = check_state_run_pq; in handle_parity_checks6()
4480 sh->check_state = check_state_run_q; in handle_parity_checks6()
4484 sh->ops.zero_sum_result = 0; in handle_parity_checks6()
4486 if (sh->check_state == check_state_run) { in handle_parity_checks6()
4488 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); in handle_parity_checks6()
4489 s->uptodate--; in handle_parity_checks6()
4491 if (sh->check_state >= check_state_run && in handle_parity_checks6()
4492 sh->check_state <= check_state_run_pq) { in handle_parity_checks6()
4496 set_bit(STRIPE_OP_CHECK, &s->ops_request); in handle_parity_checks6()
4500 /* we have 2-disk failure */ in handle_parity_checks6()
4501 BUG_ON(s->failed != 2); in handle_parity_checks6()
4504 sh->check_state = check_state_idle; in handle_parity_checks6()
4507 if (test_bit(STRIPE_INSYNC, &sh->state)) in handle_parity_checks6()
4514 if (s->failed == 2) { in handle_parity_checks6()
4515 dev = &sh->dev[s->failed_num[1]]; in handle_parity_checks6()
4516 s->locked++; in handle_parity_checks6()
4517 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4518 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4520 if (s->failed >= 1) { in handle_parity_checks6()
4521 dev = &sh->dev[s->failed_num[0]]; in handle_parity_checks6()
4522 s->locked++; in handle_parity_checks6()
4523 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4524 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4526 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { in handle_parity_checks6()
4527 dev = &sh->dev[pd_idx]; in handle_parity_checks6()
4528 s->locked++; in handle_parity_checks6()
4529 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4530 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4532 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { in handle_parity_checks6()
4533 dev = &sh->dev[qd_idx]; in handle_parity_checks6()
4534 s->locked++; in handle_parity_checks6()
4535 set_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4536 set_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4538 if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags), in handle_parity_checks6()
4540 mdname(conf->mddev), in handle_parity_checks6()
4541 dev - (struct r5dev *) &sh->dev)) { in handle_parity_checks6()
4542 clear_bit(R5_LOCKED, &dev->flags); in handle_parity_checks6()
4543 clear_bit(R5_Wantwrite, &dev->flags); in handle_parity_checks6()
4544 s->locked--; in handle_parity_checks6()
4546 clear_bit(STRIPE_DEGRADED, &sh->state); in handle_parity_checks6()
4548 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4555 sh->check_state = check_state_idle; in handle_parity_checks6()
4561 if (sh->ops.zero_sum_result == 0) { in handle_parity_checks6()
4563 if (!s->failed) in handle_parity_checks6()
4564 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4570 sh->check_state = check_state_compute_result; in handle_parity_checks6()
4578 atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); in handle_parity_checks6()
4579 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { in handle_parity_checks6()
4581 set_bit(STRIPE_INSYNC, &sh->state); in handle_parity_checks6()
4583 "%llu-%llu\n", mdname(conf->mddev), in handle_parity_checks6()
4584 (unsigned long long) sh->sector, in handle_parity_checks6()
4585 (unsigned long long) sh->sector + in handle_parity_checks6()
4586 RAID5_STRIPE_SECTORS(conf)); in handle_parity_checks6()
4588 int *target = &sh->ops.target; in handle_parity_checks6()
4590 sh->ops.target = -1; in handle_parity_checks6()
4591 sh->ops.target2 = -1; in handle_parity_checks6()
4592 sh->check_state = check_state_compute_run; in handle_parity_checks6()
4593 set_bit(STRIPE_COMPUTE_RUN, &sh->state); in handle_parity_checks6()
4594 set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); in handle_parity_checks6()
4595 if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { in handle_parity_checks6()
4597 &sh->dev[pd_idx].flags); in handle_parity_checks6()
4599 target = &sh->ops.target2; in handle_parity_checks6()
4600 s->uptodate++; in handle_parity_checks6()
4602 if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { in handle_parity_checks6()
4604 &sh->dev[qd_idx].flags); in handle_parity_checks6()
4606 s->uptodate++; in handle_parity_checks6()
4615 __func__, sh->check_state, in handle_parity_checks6()
4616 (unsigned long long) sh->sector); in handle_parity_checks6()
4621 static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) in handle_stripe_expansion() argument
4628 struct dma_async_tx_descriptor *tx = NULL; in handle_stripe_expansion() local
4629 BUG_ON(sh->batch_head); in handle_stripe_expansion()
4630 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); in handle_stripe_expansion()
4631 for (i = 0; i < sh->disks; i++) in handle_stripe_expansion()
4632 if (i != sh->pd_idx && i != sh->qd_idx) { in handle_stripe_expansion()
4638 sector_t s = raid5_compute_sector(conf, bn, 0, in handle_stripe_expansion()
4640 sh2 = raid5_get_active_stripe(conf, NULL, s, in handle_stripe_expansion()
4648 if (!test_bit(STRIPE_EXPANDING, &sh2->state) || in handle_stripe_expansion()
4649 test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { in handle_stripe_expansion()
4656 init_async_submit(&submit, 0, tx, NULL, NULL, NULL); in handle_stripe_expansion()
4657 tx = async_memcpy(sh2->dev[dd_idx].page, in handle_stripe_expansion()
4658 sh->dev[i].page, sh2->dev[dd_idx].offset, in handle_stripe_expansion()
4659 sh->dev[i].offset, RAID5_STRIPE_SIZE(conf), in handle_stripe_expansion()
4662 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); in handle_stripe_expansion()
4663 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); in handle_stripe_expansion()
4664 for (j = 0; j < conf->raid_disks; j++) in handle_stripe_expansion()
4665 if (j != sh2->pd_idx && in handle_stripe_expansion()
4666 j != sh2->qd_idx && in handle_stripe_expansion()
4667 !test_bit(R5_Expanded, &sh2->dev[j].flags)) in handle_stripe_expansion()
4669 if (j == conf->raid_disks) { in handle_stripe_expansion()
4670 set_bit(STRIPE_EXPAND_READY, &sh2->state); in handle_stripe_expansion()
4671 set_bit(STRIPE_HANDLE, &sh2->state); in handle_stripe_expansion()
4677 async_tx_quiesce(&tx); in handle_stripe_expansion()
4681 * handle_stripe - do things to a stripe.
4696 struct r5conf *conf = sh->raid_conf; in analyse_stripe() local
4697 int disks = sh->disks; in analyse_stripe()
4704 s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head; in analyse_stripe()
4705 s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head; in analyse_stripe()
4706 s->failed_num[0] = -1; in analyse_stripe()
4707 s->failed_num[1] = -1; in analyse_stripe()
4708 s->log_failed = r5l_log_disk_error(conf); in analyse_stripe()
4712 for (i=disks; i--; ) { in analyse_stripe()
4718 dev = &sh->dev[i]; in analyse_stripe()
4721 i, dev->flags, in analyse_stripe()
4722 dev->toread, dev->towrite, dev->written); in analyse_stripe()
4728 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && in analyse_stripe()
4729 !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) in analyse_stripe()
4730 set_bit(R5_Wantfill, &dev->flags); in analyse_stripe()
4733 if (test_bit(R5_LOCKED, &dev->flags)) in analyse_stripe()
4734 s->locked++; in analyse_stripe()
4735 if (test_bit(R5_UPTODATE, &dev->flags)) in analyse_stripe()
4736 s->uptodate++; in analyse_stripe()
4737 if (test_bit(R5_Wantcompute, &dev->flags)) { in analyse_stripe()
4738 s->compute++; in analyse_stripe()
4739 BUG_ON(s->compute > 2); in analyse_stripe()
4742 if (test_bit(R5_Wantfill, &dev->flags)) in analyse_stripe()
4743 s->to_fill++; in analyse_stripe()
4744 else if (dev->toread) in analyse_stripe()
4745 s->to_read++; in analyse_stripe()
4746 if (dev->towrite) { in analyse_stripe()
4747 s->to_write++; in analyse_stripe()
4748 if (!test_bit(R5_OVERWRITE, &dev->flags)) in analyse_stripe()
4749 s->non_overwrite++; in analyse_stripe()
4751 if (dev->written) in analyse_stripe()
4752 s->written++; in analyse_stripe()
4756 rdev = rcu_dereference(conf->disks[i].replacement); in analyse_stripe()
4757 if (rdev && !test_bit(Faulty, &rdev->flags) && in analyse_stripe()
4758 rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && in analyse_stripe()
4759 !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in analyse_stripe()
4761 set_bit(R5_ReadRepl, &dev->flags); in analyse_stripe()
4763 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4764 set_bit(R5_NeedReplace, &dev->flags); in analyse_stripe()
4766 clear_bit(R5_NeedReplace, &dev->flags); in analyse_stripe()
4767 rdev = rcu_dereference(conf->disks[i].rdev); in analyse_stripe()
4768 clear_bit(R5_ReadRepl, &dev->flags); in analyse_stripe()
4770 if (rdev && test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4773 is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), in analyse_stripe()
4775 if (s->blocked_rdev == NULL in analyse_stripe()
4776 && (test_bit(Blocked, &rdev->flags) in analyse_stripe()
4780 &rdev->flags); in analyse_stripe()
4781 s->blocked_rdev = rdev; in analyse_stripe()
4782 atomic_inc(&rdev->nr_pending); in analyse_stripe()
4785 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4787 /* Not in-sync */; in analyse_stripe()
4789 /* also not in-sync */ in analyse_stripe()
4790 if (!test_bit(WriteErrorSeen, &rdev->flags) && in analyse_stripe()
4791 test_bit(R5_UPTODATE, &dev->flags)) { in analyse_stripe()
4792 /* treat as in-sync, but with a read error in analyse_stripe()
4795 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4796 set_bit(R5_ReadError, &dev->flags); in analyse_stripe()
4798 } else if (test_bit(In_sync, &rdev->flags)) in analyse_stripe()
4799 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4800 else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset) in analyse_stripe()
4802 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4803 else if (test_bit(R5_UPTODATE, &dev->flags) && in analyse_stripe()
4804 test_bit(R5_Expanded, &dev->flags)) in analyse_stripe()
4809 set_bit(R5_Insync, &dev->flags); in analyse_stripe()
4811 if (test_bit(R5_WriteError, &dev->flags)) { in analyse_stripe()
4815 conf->disks[i].rdev); in analyse_stripe()
4817 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4818 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4819 s->handle_bad_blocks = 1; in analyse_stripe()
4820 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4822 clear_bit(R5_WriteError, &dev->flags); in analyse_stripe()
4824 if (test_bit(R5_MadeGood, &dev->flags)) { in analyse_stripe()
4828 conf->disks[i].rdev); in analyse_stripe()
4829 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4830 s->handle_bad_blocks = 1; in analyse_stripe()
4831 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4833 clear_bit(R5_MadeGood, &dev->flags); in analyse_stripe()
4835 if (test_bit(R5_MadeGoodRepl, &dev->flags)) { in analyse_stripe()
4837 conf->disks[i].replacement); in analyse_stripe()
4838 if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { in analyse_stripe()
4839 s->handle_bad_blocks = 1; in analyse_stripe()
4840 atomic_inc(&rdev2->nr_pending); in analyse_stripe()
4842 clear_bit(R5_MadeGoodRepl, &dev->flags); in analyse_stripe()
4844 if (!test_bit(R5_Insync, &dev->flags)) { in analyse_stripe()
4846 clear_bit(R5_ReadError, &dev->flags); in analyse_stripe()
4847 clear_bit(R5_ReWrite, &dev->flags); in analyse_stripe()
4849 if (test_bit(R5_ReadError, &dev->flags)) in analyse_stripe()
4850 clear_bit(R5_Insync, &dev->flags); in analyse_stripe()
4851 if (!test_bit(R5_Insync, &dev->flags)) { in analyse_stripe()
4852 if (s->failed < 2) in analyse_stripe()
4853 s->failed_num[s->failed] = i; in analyse_stripe()
4854 s->failed++; in analyse_stripe()
4855 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4859 conf->disks[i].replacement); in analyse_stripe()
4860 if (rdev && !test_bit(Faulty, &rdev->flags)) in analyse_stripe()
4865 if (test_bit(R5_InJournal, &dev->flags)) in analyse_stripe()
4866 s->injournal++; in analyse_stripe()
4867 if (test_bit(R5_InJournal, &dev->flags) && dev->written) in analyse_stripe()
4868 s->just_cached++; in analyse_stripe()
4870 if (test_bit(STRIPE_SYNCING, &sh->state)) { in analyse_stripe()
4880 sh->sector >= conf->mddev->recovery_cp || in analyse_stripe()
4881 test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery))) in analyse_stripe()
4882 s->syncing = 1; in analyse_stripe()
4884 s->replacing = 1; in analyse_stripe()
4896 if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) in clear_batch_ready()
4897 return (sh->batch_head && sh->batch_head != sh); in clear_batch_ready()
4898 spin_lock(&sh->stripe_lock); in clear_batch_ready()
4899 if (!sh->batch_head) { in clear_batch_ready()
4900 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4908 if (sh->batch_head != sh) { in clear_batch_ready()
4909 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4912 spin_lock(&sh->batch_lock); in clear_batch_ready()
4913 list_for_each_entry(tmp, &sh->batch_list, batch_list) in clear_batch_ready()
4914 clear_bit(STRIPE_BATCH_READY, &tmp->state); in clear_batch_ready()
4915 spin_unlock(&sh->batch_lock); in clear_batch_ready()
4916 spin_unlock(&sh->stripe_lock); in clear_batch_ready()
4932 list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { in break_stripe_batch_list()
4934 list_del_init(&sh->batch_list); in break_stripe_batch_list()
4936 WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | in break_stripe_batch_list()
4948 "stripe state: %lx\n", sh->state); in break_stripe_batch_list()
4949 WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | in break_stripe_batch_list()
4951 "head stripe state: %lx\n", head_sh->state); in break_stripe_batch_list()
4953 set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | in break_stripe_batch_list()
4957 head_sh->state & (1 << STRIPE_INSYNC)); in break_stripe_batch_list()
4959 sh->check_state = head_sh->check_state; in break_stripe_batch_list()
4960 sh->reconstruct_state = head_sh->reconstruct_state; in break_stripe_batch_list()
4961 spin_lock_irq(&sh->stripe_lock); in break_stripe_batch_list()
4962 sh->batch_head = NULL; in break_stripe_batch_list()
4963 spin_unlock_irq(&sh->stripe_lock); in break_stripe_batch_list()
4964 for (i = 0; i < sh->disks; i++) { in break_stripe_batch_list()
4965 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) in break_stripe_batch_list()
4967 sh->dev[i].flags = head_sh->dev[i].flags & in break_stripe_batch_list()
4971 sh->state & handle_flags) in break_stripe_batch_list()
4972 set_bit(STRIPE_HANDLE, &sh->state); in break_stripe_batch_list()
4975 spin_lock_irq(&head_sh->stripe_lock); in break_stripe_batch_list()
4976 head_sh->batch_head = NULL; in break_stripe_batch_list()
4977 spin_unlock_irq(&head_sh->stripe_lock); in break_stripe_batch_list()
4978 for (i = 0; i < head_sh->disks; i++) in break_stripe_batch_list()
4979 if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) in break_stripe_batch_list()
4981 if (head_sh->state & handle_flags) in break_stripe_batch_list()
4982 set_bit(STRIPE_HANDLE, &head_sh->state); in break_stripe_batch_list()
4985 wake_up(&head_sh->raid_conf->wait_for_overlap); in break_stripe_batch_list()
4991 struct r5conf *conf = sh->raid_conf; in handle_stripe() local
4994 int disks = sh->disks; in handle_stripe()
4997 clear_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5008 if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) { in handle_stripe()
5011 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5015 if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) in handle_stripe()
5018 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { in handle_stripe()
5019 spin_lock(&sh->stripe_lock); in handle_stripe()
5024 if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && in handle_stripe()
5025 !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && in handle_stripe()
5026 !test_bit(STRIPE_DISCARD, &sh->state) && in handle_stripe()
5027 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { in handle_stripe()
5028 set_bit(STRIPE_SYNCING, &sh->state); in handle_stripe()
5029 clear_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5030 clear_bit(STRIPE_REPLACED, &sh->state); in handle_stripe()
5032 spin_unlock(&sh->stripe_lock); in handle_stripe()
5034 clear_bit(STRIPE_DELAYED, &sh->state); in handle_stripe()
5038 (unsigned long long)sh->sector, sh->state, in handle_stripe()
5039 atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, in handle_stripe()
5040 sh->check_state, sh->reconstruct_state); in handle_stripe()
5044 if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) in handle_stripe()
5048 test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) { in handle_stripe()
5049 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5056 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5060 rdev_dec_pending(s.blocked_rdev, conf->mddev); in handle_stripe()
5064 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { in handle_stripe()
5066 set_bit(STRIPE_BIOFILL_RUN, &sh->state); in handle_stripe()
5080 if (s.failed > conf->max_degraded || in handle_stripe()
5082 sh->check_state = 0; in handle_stripe()
5083 sh->reconstruct_state = 0; in handle_stripe()
5086 handle_failed_stripe(conf, sh, &s, disks); in handle_stripe()
5088 handle_failed_sync(conf, sh, &s); in handle_stripe()
5095 if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) in handle_stripe()
5097 if (sh->reconstruct_state == reconstruct_state_drain_result || in handle_stripe()
5098 sh->reconstruct_state == reconstruct_state_prexor_drain_result) { in handle_stripe()
5099 sh->reconstruct_state = reconstruct_state_idle; in handle_stripe()
5104 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags) && in handle_stripe()
5105 !test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)); in handle_stripe()
5106 BUG_ON(sh->qd_idx >= 0 && in handle_stripe()
5107 !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags) && in handle_stripe()
5108 !test_bit(R5_Discard, &sh->dev[sh->qd_idx].flags)); in handle_stripe()
5109 for (i = disks; i--; ) { in handle_stripe()
5110 struct r5dev *dev = &sh->dev[i]; in handle_stripe()
5111 if (test_bit(R5_LOCKED, &dev->flags) && in handle_stripe()
5112 (i == sh->pd_idx || i == sh->qd_idx || in handle_stripe()
5113 dev->written || test_bit(R5_InJournal, in handle_stripe()
5114 &dev->flags))) { in handle_stripe()
5116 set_bit(R5_Wantwrite, &dev->flags); in handle_stripe()
5121 if (!test_bit(R5_Insync, &dev->flags) || in handle_stripe()
5122 ((i == sh->pd_idx || i == sh->qd_idx) && in handle_stripe()
5124 set_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5127 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in handle_stripe()
5135 pdev = &sh->dev[sh->pd_idx]; in handle_stripe()
5136 s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) in handle_stripe()
5137 || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); in handle_stripe()
5138 qdev = &sh->dev[sh->qd_idx]; in handle_stripe()
5139 s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) in handle_stripe()
5140 || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) in handle_stripe()
5141 || conf->level < 6; in handle_stripe()
5144 (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) in handle_stripe()
5145 && !test_bit(R5_LOCKED, &pdev->flags) in handle_stripe()
5146 && (test_bit(R5_UPTODATE, &pdev->flags) || in handle_stripe()
5147 test_bit(R5_Discard, &pdev->flags))))) && in handle_stripe()
5148 (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) in handle_stripe()
5149 && !test_bit(R5_LOCKED, &qdev->flags) in handle_stripe()
5150 && (test_bit(R5_UPTODATE, &qdev->flags) || in handle_stripe()
5151 test_bit(R5_Discard, &qdev->flags)))))) in handle_stripe()
5152 handle_stripe_clean_event(conf, sh, disks); in handle_stripe()
5155 r5c_handle_cached_data_endio(conf, sh, disks); in handle_stripe()
5174 r5c_finish_stripe_write_out(conf, sh, &s); in handle_stripe()
5185 if (!sh->reconstruct_state && !sh->check_state && !sh->log_io) { in handle_stripe()
5186 if (!r5c_is_writeback(conf->log)) { in handle_stripe()
5188 handle_stripe_dirtying(conf, sh, &s, disks); in handle_stripe()
5194 ret = r5c_try_caching_write(conf, sh, &s, in handle_stripe()
5197 * If caching phase failed: ret == -EAGAIN in handle_stripe()
5203 if (ret == -EAGAIN || in handle_stripe()
5205 (!test_bit(STRIPE_R5C_CACHING, &sh->state) && in handle_stripe()
5207 ret = handle_stripe_dirtying(conf, sh, &s, in handle_stripe()
5209 if (ret == -EAGAIN) in handle_stripe()
5220 if (sh->check_state || in handle_stripe()
5222 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && in handle_stripe()
5223 !test_bit(STRIPE_INSYNC, &sh->state))) { in handle_stripe()
5224 if (conf->level == 6) in handle_stripe()
5225 handle_parity_checks6(conf, sh, &s, disks); in handle_stripe()
5227 handle_parity_checks5(conf, sh, &s, disks); in handle_stripe()
5231 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) in handle_stripe()
5232 && !test_bit(STRIPE_REPLACED, &sh->state)) { in handle_stripe()
5234 for (i = 0; i < conf->raid_disks; i++) in handle_stripe()
5235 if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) { in handle_stripe()
5236 WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags)); in handle_stripe()
5237 set_bit(R5_WantReplace, &sh->dev[i].flags); in handle_stripe()
5238 set_bit(R5_LOCKED, &sh->dev[i].flags); in handle_stripe()
5242 set_bit(STRIPE_INSYNC, &sh->state); in handle_stripe()
5243 set_bit(STRIPE_REPLACED, &sh->state); in handle_stripe()
5246 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && in handle_stripe()
5247 test_bit(STRIPE_INSYNC, &sh->state)) { in handle_stripe()
5248 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); in handle_stripe()
5249 clear_bit(STRIPE_SYNCING, &sh->state); in handle_stripe()
5250 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) in handle_stripe()
5251 wake_up(&conf->wait_for_overlap); in handle_stripe()
5257 if (s.failed <= conf->max_degraded && !conf->mddev->ro) in handle_stripe()
5259 struct r5dev *dev = &sh->dev[s.failed_num[i]]; in handle_stripe()
5260 if (test_bit(R5_ReadError, &dev->flags) in handle_stripe()
5261 && !test_bit(R5_LOCKED, &dev->flags) in handle_stripe()
5262 && test_bit(R5_UPTODATE, &dev->flags) in handle_stripe()
5264 if (!test_bit(R5_ReWrite, &dev->flags)) { in handle_stripe()
5265 set_bit(R5_Wantwrite, &dev->flags); in handle_stripe()
5266 set_bit(R5_ReWrite, &dev->flags); in handle_stripe()
5269 set_bit(R5_Wantread, &dev->flags); in handle_stripe()
5270 set_bit(R5_LOCKED, &dev->flags); in handle_stripe()
5276 if (sh->reconstruct_state == reconstruct_state_result) { in handle_stripe()
5278 = raid5_get_active_stripe(conf, NULL, sh->sector, in handle_stripe()
5281 if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { in handle_stripe()
5285 set_bit(STRIPE_DELAYED, &sh->state); in handle_stripe()
5286 set_bit(STRIPE_HANDLE, &sh->state); in handle_stripe()
5288 &sh_src->state)) in handle_stripe()
5289 atomic_inc(&conf->preread_active_stripes); in handle_stripe()
5296 sh->reconstruct_state = reconstruct_state_idle; in handle_stripe()
5297 clear_bit(STRIPE_EXPANDING, &sh->state); in handle_stripe()
5298 for (i = conf->raid_disks; i--; ) { in handle_stripe()
5299 set_bit(R5_Wantwrite, &sh->dev[i].flags); in handle_stripe()
5300 set_bit(R5_LOCKED, &sh->dev[i].flags); in handle_stripe()
5305 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && in handle_stripe()
5306 !sh->reconstruct_state) { in handle_stripe()
5308 sh->disks = conf->raid_disks; in handle_stripe()
5309 stripe_set_idx(sh->sector, conf, 0, sh); in handle_stripe()
5311 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { in handle_stripe()
5312 clear_bit(STRIPE_EXPAND_READY, &sh->state); in handle_stripe()
5313 atomic_dec(&conf->reshape_stripes); in handle_stripe()
5314 wake_up(&conf->wait_for_overlap); in handle_stripe()
5315 md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); in handle_stripe()
5319 !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) in handle_stripe()
5320 handle_stripe_expansion(conf, sh); in handle_stripe()
5325 if (conf->mddev->external) in handle_stripe()
5327 conf->mddev); in handle_stripe()
5334 conf->mddev); in handle_stripe()
5338 for (i = disks; i--; ) { in handle_stripe()
5340 struct r5dev *dev = &sh->dev[i]; in handle_stripe()
5341 if (test_and_clear_bit(R5_WriteError, &dev->flags)) { in handle_stripe()
5343 rdev = rdev_pend_deref(conf->disks[i].rdev); in handle_stripe()
5344 if (!rdev_set_badblocks(rdev, sh->sector, in handle_stripe()
5345 RAID5_STRIPE_SECTORS(conf), 0)) in handle_stripe()
5346 md_error(conf->mddev, rdev); in handle_stripe()
5347 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5349 if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { in handle_stripe()
5350 rdev = rdev_pend_deref(conf->disks[i].rdev); in handle_stripe()
5351 rdev_clear_badblocks(rdev, sh->sector, in handle_stripe()
5352 RAID5_STRIPE_SECTORS(conf), 0); in handle_stripe()
5353 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5355 if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { in handle_stripe()
5356 rdev = rdev_pend_deref(conf->disks[i].replacement); in handle_stripe()
5359 rdev = rdev_pend_deref(conf->disks[i].rdev); in handle_stripe()
5360 rdev_clear_badblocks(rdev, sh->sector, in handle_stripe()
5361 RAID5_STRIPE_SECTORS(conf), 0); in handle_stripe()
5362 rdev_dec_pending(rdev, conf->mddev); in handle_stripe()
5376 atomic_dec(&conf->preread_active_stripes); in handle_stripe()
5377 if (atomic_read(&conf->preread_active_stripes) < in handle_stripe()
5379 md_wakeup_thread(conf->mddev->thread); in handle_stripe()
5382 clear_bit_unlock(STRIPE_ACTIVE, &sh->state); in handle_stripe()
5385 static void raid5_activate_delayed(struct r5conf *conf) in raid5_activate_delayed() argument
5386 __must_hold(&conf->device_lock) in raid5_activate_delayed()
5388 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { in raid5_activate_delayed()
5389 while (!list_empty(&conf->delayed_list)) { in raid5_activate_delayed()
5390 struct list_head *l = conf->delayed_list.next; in raid5_activate_delayed()
5394 clear_bit(STRIPE_DELAYED, &sh->state); in raid5_activate_delayed()
5395 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in raid5_activate_delayed()
5396 atomic_inc(&conf->preread_active_stripes); in raid5_activate_delayed()
5397 list_add_tail(&sh->lru, &conf->hold_list); in raid5_activate_delayed()
5403 static void activate_bit_delay(struct r5conf *conf, in activate_bit_delay() argument
5405 __must_hold(&conf->device_lock) in activate_bit_delay()
5408 list_add(&head, &conf->bitmap_list); in activate_bit_delay()
5409 list_del_init(&conf->bitmap_list); in activate_bit_delay()
5413 list_del_init(&sh->lru); in activate_bit_delay()
5414 atomic_inc(&sh->count); in activate_bit_delay()
5415 hash = sh->hash_lock_index; in activate_bit_delay()
5416 __release_stripe(conf, sh, &temp_inactive_list[hash]); in activate_bit_delay()
5422 struct r5conf *conf = mddev->private; in in_chunk_boundary() local
5423 sector_t sector = bio->bi_iter.bi_sector; in in_chunk_boundary()
5427 chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); in in_chunk_boundary()
5429 ((sector & (chunk_sectors - 1)) + bio_sectors); in in_chunk_boundary()
5436 static void add_bio_to_retry(struct bio *bi,struct r5conf *conf) in add_bio_to_retry() argument
5440 spin_lock_irqsave(&conf->device_lock, flags); in add_bio_to_retry()
5442 bi->bi_next = conf->retry_read_aligned_list; in add_bio_to_retry()
5443 conf->retry_read_aligned_list = bi; in add_bio_to_retry()
5445 spin_unlock_irqrestore(&conf->device_lock, flags); in add_bio_to_retry()
5446 md_wakeup_thread(conf->mddev->thread); in add_bio_to_retry()
5449 static struct bio *remove_bio_from_retry(struct r5conf *conf, in remove_bio_from_retry() argument
5454 bi = conf->retry_read_aligned; in remove_bio_from_retry()
5456 *offset = conf->retry_read_offset; in remove_bio_from_retry()
5457 conf->retry_read_aligned = NULL; in remove_bio_from_retry()
5460 bi = conf->retry_read_aligned_list; in remove_bio_from_retry()
5462 conf->retry_read_aligned_list = bi->bi_next; in remove_bio_from_retry()
5463 bi->bi_next = NULL; in remove_bio_from_retry()
5478 struct bio *raid_bi = bi->bi_private; in raid5_align_endio()
5479 struct md_rdev *rdev = (void *)raid_bi->bi_next; in raid5_align_endio()
5480 struct mddev *mddev = rdev->mddev; in raid5_align_endio()
5481 struct r5conf *conf = mddev->private; in raid5_align_endio() local
5482 blk_status_t error = bi->bi_status; in raid5_align_endio()
5485 raid_bi->bi_next = NULL; in raid5_align_endio()
5486 rdev_dec_pending(rdev, conf->mddev); in raid5_align_endio()
5490 if (atomic_dec_and_test(&conf->active_aligned_reads)) in raid5_align_endio()
5491 wake_up(&conf->wait_for_quiescent); in raid5_align_endio()
5497 add_bio_to_retry(raid_bi, conf); in raid5_align_endio()
5502 struct r5conf *conf = mddev->private; in raid5_read_one_chunk() local
5514 sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0, in raid5_read_one_chunk()
5519 if (r5c_big_stripe_cached(conf, sector)) in raid5_read_one_chunk()
5522 rdev = rcu_dereference(conf->disks[dd_idx].replacement); in raid5_read_one_chunk()
5523 if (!rdev || test_bit(Faulty, &rdev->flags) || in raid5_read_one_chunk()
5524 rdev->recovery_offset < end_sector) { in raid5_read_one_chunk()
5525 rdev = rcu_dereference(conf->disks[dd_idx].rdev); in raid5_read_one_chunk()
5528 if (test_bit(Faulty, &rdev->flags) || in raid5_read_one_chunk()
5529 !(test_bit(In_sync, &rdev->flags) || in raid5_read_one_chunk()
5530 rdev->recovery_offset >= end_sector)) in raid5_read_one_chunk()
5534 atomic_inc(&rdev->nr_pending); in raid5_read_one_chunk()
5544 raid_bio->bi_next = (void *)rdev; in raid5_read_one_chunk()
5546 align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO, in raid5_read_one_chunk()
5547 &mddev->bio_set); in raid5_read_one_chunk()
5548 align_bio->bi_end_io = raid5_align_endio; in raid5_read_one_chunk()
5549 align_bio->bi_private = raid_bio; in raid5_read_one_chunk()
5550 align_bio->bi_iter.bi_sector = sector; in raid5_read_one_chunk()
5552 /* No reshape active, so we can trust rdev->data_offset */ in raid5_read_one_chunk()
5553 align_bio->bi_iter.bi_sector += rdev->data_offset; in raid5_read_one_chunk()
5556 if (conf->quiesce == 0) { in raid5_read_one_chunk()
5557 atomic_inc(&conf->active_aligned_reads); in raid5_read_one_chunk()
5561 if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) { in raid5_read_one_chunk()
5565 if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads)) in raid5_read_one_chunk()
5566 wake_up(&conf->wait_for_quiescent); in raid5_read_one_chunk()
5567 spin_lock_irq(&conf->device_lock); in raid5_read_one_chunk()
5568 wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, in raid5_read_one_chunk()
5569 conf->device_lock); in raid5_read_one_chunk()
5570 atomic_inc(&conf->active_aligned_reads); in raid5_read_one_chunk()
5571 spin_unlock_irq(&conf->device_lock); in raid5_read_one_chunk()
5574 if (mddev->gendisk) in raid5_read_one_chunk()
5575 trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), in raid5_read_one_chunk()
5576 raid_bio->bi_iter.bi_sector); in raid5_read_one_chunk()
5588 sector_t sector = raid_bio->bi_iter.bi_sector; in chunk_aligned_read()
5589 unsigned chunk_sects = mddev->chunk_sectors; in chunk_aligned_read()
5590 unsigned sectors = chunk_sects - (sector & (chunk_sects-1)); in chunk_aligned_read()
5593 struct r5conf *conf = mddev->private; in chunk_aligned_read() local
5594 split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split); in chunk_aligned_read()
5606 /* __get_priority_stripe - get the next stripe to process
5616 static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) in __get_priority_stripe() argument
5617 __must_hold(&conf->device_lock) in __get_priority_stripe()
5622 bool second_try = !r5c_is_writeback(conf->log) && in __get_priority_stripe()
5623 !r5l_log_disk_error(conf); in __get_priority_stripe()
5624 bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || in __get_priority_stripe()
5625 r5l_log_disk_error(conf); in __get_priority_stripe()
5630 if (conf->worker_cnt_per_group == 0) { in __get_priority_stripe()
5631 handle_list = try_loprio ? &conf->loprio_list : in __get_priority_stripe()
5632 &conf->handle_list; in __get_priority_stripe()
5634 handle_list = try_loprio ? &conf->worker_groups[group].loprio_list : in __get_priority_stripe()
5635 &conf->worker_groups[group].handle_list; in __get_priority_stripe()
5636 wg = &conf->worker_groups[group]; in __get_priority_stripe()
5639 for (i = 0; i < conf->group_cnt; i++) { in __get_priority_stripe()
5640 handle_list = try_loprio ? &conf->worker_groups[i].loprio_list : in __get_priority_stripe()
5641 &conf->worker_groups[i].handle_list; in __get_priority_stripe()
5642 wg = &conf->worker_groups[i]; in __get_priority_stripe()
5651 list_empty(&conf->hold_list) ? "empty" : "busy", in __get_priority_stripe()
5652 atomic_read(&conf->pending_full_writes), conf->bypass_count); in __get_priority_stripe()
5655 sh = list_entry(handle_list->next, typeof(*sh), lru); in __get_priority_stripe()
5657 if (list_empty(&conf->hold_list)) in __get_priority_stripe()
5658 conf->bypass_count = 0; in __get_priority_stripe()
5659 else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) { in __get_priority_stripe()
5660 if (conf->hold_list.next == conf->last_hold) in __get_priority_stripe()
5661 conf->bypass_count++; in __get_priority_stripe()
5663 conf->last_hold = conf->hold_list.next; in __get_priority_stripe()
5664 conf->bypass_count -= conf->bypass_threshold; in __get_priority_stripe()
5665 if (conf->bypass_count < 0) in __get_priority_stripe()
5666 conf->bypass_count = 0; in __get_priority_stripe()
5669 } else if (!list_empty(&conf->hold_list) && in __get_priority_stripe()
5670 ((conf->bypass_threshold && in __get_priority_stripe()
5671 conf->bypass_count > conf->bypass_threshold) || in __get_priority_stripe()
5672 atomic_read(&conf->pending_full_writes) == 0)) { in __get_priority_stripe()
5674 list_for_each_entry(tmp, &conf->hold_list, lru) { in __get_priority_stripe()
5675 if (conf->worker_cnt_per_group == 0 || in __get_priority_stripe()
5677 !cpu_online(tmp->cpu) || in __get_priority_stripe()
5678 cpu_to_group(tmp->cpu) == group) { in __get_priority_stripe()
5685 conf->bypass_count -= conf->bypass_threshold; in __get_priority_stripe()
5686 if (conf->bypass_count < 0) in __get_priority_stripe()
5687 conf->bypass_count = 0; in __get_priority_stripe()
5701 wg->stripes_cnt--; in __get_priority_stripe()
5702 sh->group = NULL; in __get_priority_stripe()
5704 list_del_init(&sh->lru); in __get_priority_stripe()
5705 BUG_ON(atomic_inc_return(&sh->count) != 1); in __get_priority_stripe()
5720 struct mddev *mddev = cb->cb.data; in raid5_unplug()
5721 struct r5conf *conf = mddev->private; in raid5_unplug() local
5725 if (cb->list.next && !list_empty(&cb->list)) { in raid5_unplug()
5726 spin_lock_irq(&conf->device_lock); in raid5_unplug()
5727 while (!list_empty(&cb->list)) { in raid5_unplug()
5728 sh = list_first_entry(&cb->list, struct stripe_head, lru); in raid5_unplug()
5729 list_del_init(&sh->lru); in raid5_unplug()
5736 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); in raid5_unplug()
5741 hash = sh->hash_lock_index; in raid5_unplug()
5742 __release_stripe(conf, sh, &cb->temp_inactive_list[hash]); in raid5_unplug()
5745 spin_unlock_irq(&conf->device_lock); in raid5_unplug()
5747 release_inactive_stripe_list(conf, cb->temp_inactive_list, in raid5_unplug()
5749 if (mddev->queue) in raid5_unplug()
5750 trace_block_unplug(mddev->queue, cnt, !from_schedule); in raid5_unplug()
5769 if (cb->list.next == NULL) { in release_stripe_plug()
5771 INIT_LIST_HEAD(&cb->list); in release_stripe_plug()
5773 INIT_LIST_HEAD(cb->temp_inactive_list + i); in release_stripe_plug()
5776 if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) in release_stripe_plug()
5777 list_add_tail(&sh->lru, &cb->list); in release_stripe_plug()
5784 struct r5conf *conf = mddev->private; in make_discard_request() local
5790 if (WARN_ON_ONCE(bi->bi_opf & REQ_NOWAIT)) in make_discard_request()
5793 if (mddev->reshape_position != MaxSector) in make_discard_request()
5797 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in make_discard_request()
5800 bi->bi_next = NULL; in make_discard_request()
5802 stripe_sectors = conf->chunk_sectors * in make_discard_request()
5803 (conf->raid_disks - conf->max_degraded); in make_discard_request()
5808 logical_sector *= conf->chunk_sectors; in make_discard_request()
5809 last_sector *= conf->chunk_sectors; in make_discard_request()
5812 logical_sector += RAID5_STRIPE_SECTORS(conf)) { in make_discard_request()
5816 sh = raid5_get_active_stripe(conf, NULL, logical_sector, 0); in make_discard_request()
5817 prepare_to_wait(&conf->wait_for_overlap, &w, in make_discard_request()
5819 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); in make_discard_request()
5820 if (test_bit(STRIPE_SYNCING, &sh->state)) { in make_discard_request()
5825 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); in make_discard_request()
5826 spin_lock_irq(&sh->stripe_lock); in make_discard_request()
5827 for (d = 0; d < conf->raid_disks; d++) { in make_discard_request()
5828 if (d == sh->pd_idx || d == sh->qd_idx) in make_discard_request()
5830 if (sh->dev[d].towrite || sh->dev[d].toread) { in make_discard_request()
5831 set_bit(R5_Overlap, &sh->dev[d].flags); in make_discard_request()
5832 spin_unlock_irq(&sh->stripe_lock); in make_discard_request()
5838 set_bit(STRIPE_DISCARD, &sh->state); in make_discard_request()
5839 finish_wait(&conf->wait_for_overlap, &w); in make_discard_request()
5840 sh->overwrite_disks = 0; in make_discard_request()
5841 for (d = 0; d < conf->raid_disks; d++) { in make_discard_request()
5842 if (d == sh->pd_idx || d == sh->qd_idx) in make_discard_request()
5844 sh->dev[d].towrite = bi; in make_discard_request()
5845 set_bit(R5_OVERWRITE, &sh->dev[d].flags); in make_discard_request()
5848 sh->overwrite_disks++; in make_discard_request()
5850 spin_unlock_irq(&sh->stripe_lock); in make_discard_request()
5851 if (conf->mddev->bitmap) { in make_discard_request()
5853 d < conf->raid_disks - conf->max_degraded; in make_discard_request()
5855 md_bitmap_startwrite(mddev->bitmap, in make_discard_request()
5856 sh->sector, in make_discard_request()
5857 RAID5_STRIPE_SECTORS(conf), in make_discard_request()
5859 sh->bm_seq = conf->seq_flush + 1; in make_discard_request()
5860 set_bit(STRIPE_BIT_DELAY, &sh->state); in make_discard_request()
5863 set_bit(STRIPE_HANDLE, &sh->state); in make_discard_request()
5864 clear_bit(STRIPE_DELAYED, &sh->state); in make_discard_request()
5865 if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in make_discard_request()
5866 atomic_inc(&conf->preread_active_stripes); in make_discard_request()
5876 return mddev->reshape_backwards ? sector < reshape_sector : in ahead_of_reshape()
5883 return mddev->reshape_backwards ? max < reshape_sector : in range_ahead_of_reshape()
5887 static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf, in stripe_ahead_of_reshape() argument
5894 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { in stripe_ahead_of_reshape()
5895 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in stripe_ahead_of_reshape()
5898 min_sector = min(min_sector, sh->dev[dd_idx].sector); in stripe_ahead_of_reshape()
5899 max_sector = max(max_sector, sh->dev[dd_idx].sector); in stripe_ahead_of_reshape()
5902 spin_lock_irq(&conf->device_lock); in stripe_ahead_of_reshape()
5905 conf->reshape_progress)) in stripe_ahead_of_reshape()
5909 spin_unlock_irq(&conf->device_lock); in stripe_ahead_of_reshape()
5914 static int add_all_stripe_bios(struct r5conf *conf, in add_all_stripe_bios() argument
5921 spin_lock_irq(&sh->stripe_lock); in add_all_stripe_bios()
5923 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { in add_all_stripe_bios()
5924 struct r5dev *dev = &sh->dev[dd_idx]; in add_all_stripe_bios()
5926 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in add_all_stripe_bios()
5929 if (dev->sector < ctx->first_sector || in add_all_stripe_bios()
5930 dev->sector >= ctx->last_sector) in add_all_stripe_bios()
5934 set_bit(R5_Overlap, &dev->flags); in add_all_stripe_bios()
5943 for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { in add_all_stripe_bios()
5944 struct r5dev *dev = &sh->dev[dd_idx]; in add_all_stripe_bios()
5946 if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) in add_all_stripe_bios()
5949 if (dev->sector < ctx->first_sector || in add_all_stripe_bios()
5950 dev->sector >= ctx->last_sector) in add_all_stripe_bios()
5954 clear_bit((dev->sector - ctx->first_sector) >> in add_all_stripe_bios()
5955 RAID5_STRIPE_SHIFT(conf), ctx->sectors_to_do); in add_all_stripe_bios()
5959 spin_unlock_irq(&sh->stripe_lock); in add_all_stripe_bios()
5965 return test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && in reshape_inprogress()
5966 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && in reshape_inprogress()
5967 !test_bit(MD_RECOVERY_DONE, &mddev->recovery) && in reshape_inprogress()
5968 !test_bit(MD_RECOVERY_INTR, &mddev->recovery); in reshape_inprogress()
5977 struct r5conf *conf, struct stripe_request_ctx *ctx, in make_stripe_request() argument
5987 seq = read_seqcount_begin(&conf->gen_lock); in make_stripe_request()
5989 if (unlikely(conf->reshape_progress != MaxSector)) { in make_stripe_request()
5993 * possible to see a half-updated value in make_stripe_request()
5999 spin_lock_irq(&conf->device_lock); in make_stripe_request()
6001 conf->reshape_progress)) { in make_stripe_request()
6005 conf->reshape_safe)) { in make_stripe_request()
6006 spin_unlock_irq(&conf->device_lock); in make_stripe_request()
6011 spin_unlock_irq(&conf->device_lock); in make_stripe_request()
6014 new_sector = raid5_compute_sector(conf, logical_sector, previous, in make_stripe_request()
6021 if (bi->bi_opf & REQ_RAHEAD) in make_stripe_request()
6023 sh = raid5_get_active_stripe(conf, ctx, new_sector, flags); in make_stripe_request()
6025 /* cannot get stripe, just give-up */ in make_stripe_request()
6026 bi->bi_status = BLK_STS_IOERR; in make_stripe_request()
6031 stripe_ahead_of_reshape(mddev, conf, sh)) { in make_stripe_request()
6044 if (read_seqcount_retry(&conf->gen_lock, seq)) { in make_stripe_request()
6050 if (test_bit(STRIPE_EXPANDING, &sh->state) || in make_stripe_request()
6051 !add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) { in make_stripe_request()
6056 md_wakeup_thread(mddev->thread); in make_stripe_request()
6062 stripe_add_to_batch_list(conf, sh, ctx->batch_last); in make_stripe_request()
6063 if (ctx->batch_last) in make_stripe_request()
6064 raid5_release_stripe(ctx->batch_last); in make_stripe_request()
6065 atomic_inc(&sh->count); in make_stripe_request()
6066 ctx->batch_last = sh; in make_stripe_request()
6069 if (ctx->do_flush) { in make_stripe_request()
6070 set_bit(STRIPE_R5C_PREFLUSH, &sh->state); in make_stripe_request()
6072 ctx->do_flush = false; in make_stripe_request()
6075 set_bit(STRIPE_HANDLE, &sh->state); in make_stripe_request()
6076 clear_bit(STRIPE_DELAYED, &sh->state); in make_stripe_request()
6077 if ((!sh->batch_head || sh == sh->batch_head) && in make_stripe_request()
6078 (bi->bi_opf & REQ_SYNC) && in make_stripe_request()
6079 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) in make_stripe_request()
6080 atomic_inc(&conf->preread_active_stripes); in make_stripe_request()
6090 bi->bi_status = BLK_STS_IOERR; in make_stripe_request()
6103 static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf, in raid5_bio_lowest_chunk_sector() argument
6106 int sectors_per_chunk = conf->chunk_sectors; in raid5_bio_lowest_chunk_sector()
6107 int raid_disks = conf->raid_disks; in raid5_bio_lowest_chunk_sector()
6111 sector_t r_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in raid5_bio_lowest_chunk_sector()
6115 sector = raid5_compute_sector(conf, r_sector, 0, &dd_idx, &sh); in raid5_bio_lowest_chunk_sector()
6117 if (sectors_per_chunk - chunk_offset >= bio_sectors(bi)) in raid5_bio_lowest_chunk_sector()
6128 return r_sector + sectors_per_chunk - chunk_offset; in raid5_bio_lowest_chunk_sector()
6134 struct r5conf *conf = mddev->private; in raid5_make_request() local
6141 if (unlikely(bi->bi_opf & REQ_PREFLUSH)) { in raid5_make_request()
6142 int ret = log_handle_flush_request(conf, bi); in raid5_make_request()
6146 if (ret == -ENODEV) { in raid5_make_request()
6150 /* ret == -EAGAIN, fallback */ in raid5_make_request()
6155 ctx.do_flush = bi->bi_opf & REQ_PREFLUSH; in raid5_make_request()
6165 if (rw == READ && mddev->degraded == 0 && in raid5_make_request()
6166 mddev->reshape_position == MaxSector) { in raid5_make_request()
6178 logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in raid5_make_request()
6181 bi->bi_next = NULL; in raid5_make_request()
6183 stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx.last_sector - logical_sector, in raid5_make_request()
6184 RAID5_STRIPE_SECTORS(conf)); in raid5_make_request()
6188 bi->bi_iter.bi_sector, ctx.last_sector); in raid5_make_request()
6191 if ((bi->bi_opf & REQ_NOWAIT) && in raid5_make_request()
6192 (conf->reshape_progress != MaxSector) && in raid5_make_request()
6193 !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) && in raid5_make_request()
6194 ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) { in raid5_make_request()
6209 if (likely(conf->reshape_progress == MaxSector)) in raid5_make_request()
6210 logical_sector = raid5_bio_lowest_chunk_sector(conf, bi); in raid5_make_request()
6211 s = (logical_sector - ctx.first_sector) >> RAID5_STRIPE_SHIFT(conf); in raid5_make_request()
6213 add_wait_queue(&conf->wait_for_overlap, &wait); in raid5_make_request()
6215 res = make_stripe_request(mddev, conf, &ctx, logical_sector, in raid5_make_request()
6246 (s << RAID5_STRIPE_SHIFT(conf)); in raid5_make_request()
6248 remove_wait_queue(&conf->wait_for_overlap, &wait); in raid5_make_request()
6272 struct r5conf *conf = mddev->private; in reshape_request() local
6276 int raid_disks = conf->previous_raid_disks; in reshape_request()
6277 int data_disks = raid_disks - conf->max_degraded; in reshape_request()
6278 int new_data_disks = conf->raid_disks - conf->max_degraded; in reshape_request()
6289 if (mddev->reshape_backwards && in reshape_request()
6290 conf->reshape_progress < raid5_size(mddev, 0, 0)) { in reshape_request()
6292 - conf->reshape_progress; in reshape_request()
6293 } else if (mddev->reshape_backwards && in reshape_request()
6294 conf->reshape_progress == MaxSector) { in reshape_request()
6297 } else if (!mddev->reshape_backwards && in reshape_request()
6298 conf->reshape_progress > 0) in reshape_request()
6299 sector_nr = conf->reshape_progress; in reshape_request()
6302 mddev->curr_resync_completed = sector_nr; in reshape_request()
6303 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6315 reshape_sectors = max(conf->chunk_sectors, conf->prev_chunk_sectors); in reshape_request()
6318 * the data about to be copied would over-write the source of in reshape_request()
6323 writepos = conf->reshape_progress; in reshape_request()
6325 readpos = conf->reshape_progress; in reshape_request()
6327 safepos = conf->reshape_safe; in reshape_request()
6329 if (mddev->reshape_backwards) { in reshape_request()
6331 writepos -= reshape_sectors; in reshape_request()
6336 /* readpos and safepos are worst-case calculations. in reshape_request()
6340 readpos -= min_t(sector_t, reshape_sectors, readpos); in reshape_request()
6341 safepos -= min_t(sector_t, reshape_sectors, safepos); in reshape_request()
6347 if (mddev->reshape_backwards) { in reshape_request()
6348 BUG_ON(conf->reshape_progress == 0); in reshape_request()
6350 BUG_ON((mddev->dev_sectors & in reshape_request()
6351 ~((sector_t)reshape_sectors - 1)) in reshape_request()
6352 - reshape_sectors - stripe_addr in reshape_request()
6367 * ensure safety in the face of a crash - that must be done by userspace in reshape_request()
6379 if (conf->min_offset_diff < 0) { in reshape_request()
6380 safepos += -conf->min_offset_diff; in reshape_request()
6381 readpos += -conf->min_offset_diff; in reshape_request()
6383 writepos += conf->min_offset_diff; in reshape_request()
6385 if ((mddev->reshape_backwards in reshape_request()
6388 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { in reshape_request()
6390 wait_event(conf->wait_for_overlap, in reshape_request()
6391 atomic_read(&conf->reshape_stripes)==0 in reshape_request()
6392 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6393 if (atomic_read(&conf->reshape_stripes) != 0) in reshape_request()
6395 mddev->reshape_position = conf->reshape_progress; in reshape_request()
6396 mddev->curr_resync_completed = sector_nr; in reshape_request()
6397 if (!mddev->reshape_backwards) in reshape_request()
6400 if (rdev->raid_disk >= 0 && in reshape_request()
6401 !test_bit(Journal, &rdev->flags) && in reshape_request()
6402 !test_bit(In_sync, &rdev->flags) && in reshape_request()
6403 rdev->recovery_offset < sector_nr) in reshape_request()
6404 rdev->recovery_offset = sector_nr; in reshape_request()
6406 conf->reshape_checkpoint = jiffies; in reshape_request()
6407 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in reshape_request()
6408 md_wakeup_thread(mddev->thread); in reshape_request()
6409 wait_event(mddev->sb_wait, mddev->sb_flags == 0 || in reshape_request()
6410 test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6411 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) in reshape_request()
6413 spin_lock_irq(&conf->device_lock); in reshape_request()
6414 conf->reshape_safe = mddev->reshape_position; in reshape_request()
6415 spin_unlock_irq(&conf->device_lock); in reshape_request()
6416 wake_up(&conf->wait_for_overlap); in reshape_request()
6417 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6421 for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { in reshape_request()
6424 sh = raid5_get_active_stripe(conf, NULL, stripe_addr+i, in reshape_request()
6426 set_bit(STRIPE_EXPANDING, &sh->state); in reshape_request()
6427 atomic_inc(&conf->reshape_stripes); in reshape_request()
6431 for (j=sh->disks; j--;) { in reshape_request()
6433 if (j == sh->pd_idx) in reshape_request()
6435 if (conf->level == 6 && in reshape_request()
6436 j == sh->qd_idx) in reshape_request()
6443 memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf)); in reshape_request()
6444 set_bit(R5_Expanded, &sh->dev[j].flags); in reshape_request()
6445 set_bit(R5_UPTODATE, &sh->dev[j].flags); in reshape_request()
6448 set_bit(STRIPE_EXPAND_READY, &sh->state); in reshape_request()
6449 set_bit(STRIPE_HANDLE, &sh->state); in reshape_request()
6451 list_add(&sh->lru, &stripes); in reshape_request()
6453 spin_lock_irq(&conf->device_lock); in reshape_request()
6454 if (mddev->reshape_backwards) in reshape_request()
6455 conf->reshape_progress -= reshape_sectors * new_data_disks; in reshape_request()
6457 conf->reshape_progress += reshape_sectors * new_data_disks; in reshape_request()
6458 spin_unlock_irq(&conf->device_lock); in reshape_request()
6465 raid5_compute_sector(conf, stripe_addr*(new_data_disks), in reshape_request()
6468 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) in reshape_request()
6469 * new_data_disks - 1), in reshape_request()
6471 if (last_sector >= mddev->dev_sectors) in reshape_request()
6472 last_sector = mddev->dev_sectors - 1; in reshape_request()
6474 sh = raid5_get_active_stripe(conf, NULL, first_sector, in reshape_request()
6476 set_bit(STRIPE_EXPAND_SOURCE, &sh->state); in reshape_request()
6477 set_bit(STRIPE_HANDLE, &sh->state); in reshape_request()
6479 first_sector += RAID5_STRIPE_SECTORS(conf); in reshape_request()
6486 list_del_init(&sh->lru); in reshape_request()
6495 if (mddev->curr_resync_completed > mddev->resync_max || in reshape_request()
6496 (sector_nr - mddev->curr_resync_completed) * 2 in reshape_request()
6497 >= mddev->resync_max - mddev->curr_resync_completed) { in reshape_request()
6499 wait_event(conf->wait_for_overlap, in reshape_request()
6500 atomic_read(&conf->reshape_stripes) == 0 in reshape_request()
6501 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6502 if (atomic_read(&conf->reshape_stripes) != 0) in reshape_request()
6504 mddev->reshape_position = conf->reshape_progress; in reshape_request()
6505 mddev->curr_resync_completed = sector_nr; in reshape_request()
6506 if (!mddev->reshape_backwards) in reshape_request()
6509 if (rdev->raid_disk >= 0 && in reshape_request()
6510 !test_bit(Journal, &rdev->flags) && in reshape_request()
6511 !test_bit(In_sync, &rdev->flags) && in reshape_request()
6512 rdev->recovery_offset < sector_nr) in reshape_request()
6513 rdev->recovery_offset = sector_nr; in reshape_request()
6514 conf->reshape_checkpoint = jiffies; in reshape_request()
6515 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in reshape_request()
6516 md_wakeup_thread(mddev->thread); in reshape_request()
6517 wait_event(mddev->sb_wait, in reshape_request()
6518 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) in reshape_request()
6519 || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); in reshape_request()
6520 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) in reshape_request()
6522 spin_lock_irq(&conf->device_lock); in reshape_request()
6523 conf->reshape_safe = mddev->reshape_position; in reshape_request()
6524 spin_unlock_irq(&conf->device_lock); in reshape_request()
6525 wake_up(&conf->wait_for_overlap); in reshape_request()
6526 sysfs_notify_dirent_safe(mddev->sysfs_completed); in reshape_request()
6535 struct r5conf *conf = mddev->private; in raid5_sync_request() local
6537 sector_t max_sector = mddev->dev_sectors; in raid5_sync_request()
6545 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { in raid5_sync_request()
6546 end_reshape(conf); in raid5_sync_request()
6550 if (mddev->curr_resync < max_sector) /* aborted */ in raid5_sync_request()
6551 md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync, in raid5_sync_request()
6554 conf->fullsync = 0; in raid5_sync_request()
6555 md_bitmap_close_sync(mddev->bitmap); in raid5_sync_request()
6561 wait_event(conf->wait_for_overlap, conf->quiesce != 2); in raid5_sync_request()
6563 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) in raid5_sync_request()
6576 if (mddev->degraded >= conf->max_degraded && in raid5_sync_request()
6577 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { in raid5_sync_request()
6578 sector_t rv = mddev->dev_sectors - sector_nr; in raid5_sync_request()
6582 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && in raid5_sync_request()
6583 !conf->fullsync && in raid5_sync_request()
6584 !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && in raid5_sync_request()
6585 sync_blocks >= RAID5_STRIPE_SECTORS(conf)) { in raid5_sync_request()
6587 do_div(sync_blocks, RAID5_STRIPE_SECTORS(conf)); in raid5_sync_request()
6590 return sync_blocks * RAID5_STRIPE_SECTORS(conf); in raid5_sync_request()
6593 md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); in raid5_sync_request()
6595 sh = raid5_get_active_stripe(conf, NULL, sector_nr, in raid5_sync_request()
6598 sh = raid5_get_active_stripe(conf, NULL, sector_nr, 0); in raid5_sync_request()
6609 for (i = 0; i < conf->raid_disks; i++) { in raid5_sync_request()
6610 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_sync_request()
6612 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) in raid5_sync_request()
6617 md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); in raid5_sync_request()
6619 set_bit(STRIPE_SYNC_REQUESTED, &sh->state); in raid5_sync_request()
6620 set_bit(STRIPE_HANDLE, &sh->state); in raid5_sync_request()
6624 return RAID5_STRIPE_SECTORS(conf); in raid5_sync_request()
6627 static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, in retry_aligned_read() argument
6632 * We cannot pre-allocate enough stripe_heads as we may need in retry_aligned_read()
6635 * ->bi_hw_segments how many have been done. in retry_aligned_read()
6646 logical_sector = raid_bio->bi_iter.bi_sector & in retry_aligned_read()
6647 ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); in retry_aligned_read()
6648 sector = raid5_compute_sector(conf, logical_sector, in retry_aligned_read()
6653 logical_sector += RAID5_STRIPE_SECTORS(conf), in retry_aligned_read()
6654 sector += RAID5_STRIPE_SECTORS(conf), in retry_aligned_read()
6661 sh = raid5_get_active_stripe(conf, NULL, sector, in retry_aligned_read()
6664 /* failed to get a stripe - must wait */ in retry_aligned_read()
6665 conf->retry_read_aligned = raid_bio; in retry_aligned_read()
6666 conf->retry_read_offset = scnt; in retry_aligned_read()
6672 conf->retry_read_aligned = raid_bio; in retry_aligned_read()
6673 conf->retry_read_offset = scnt; in retry_aligned_read()
6677 set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); in retry_aligned_read()
6685 if (atomic_dec_and_test(&conf->active_aligned_reads)) in retry_aligned_read()
6686 wake_up(&conf->wait_for_quiescent); in retry_aligned_read()
6690 static int handle_active_stripes(struct r5conf *conf, int group, in handle_active_stripes() argument
6693 __must_hold(&conf->device_lock) in handle_active_stripes()
6700 (sh = __get_priority_stripe(conf, group)) != NULL) in handle_active_stripes()
6708 spin_unlock_irq(&conf->device_lock); in handle_active_stripes()
6709 log_flush_stripe_to_raid(conf); in handle_active_stripes()
6710 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6715 spin_unlock_irq(&conf->device_lock); in handle_active_stripes()
6717 release_inactive_stripe_list(conf, temp_inactive_list, in handle_active_stripes()
6720 r5l_flush_stripe_to_raid(conf->log); in handle_active_stripes()
6722 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6728 log_write_stripe_run(conf); in handle_active_stripes()
6732 spin_lock_irq(&conf->device_lock); in handle_active_stripes()
6734 hash = batch[i]->hash_lock_index; in handle_active_stripes()
6735 __release_stripe(conf, batch[i], &temp_inactive_list[hash]); in handle_active_stripes()
6743 struct r5worker_group *group = worker->group; in raid5_do_work()
6744 struct r5conf *conf = group->conf; in raid5_do_work() local
6745 struct mddev *mddev = conf->mddev; in raid5_do_work()
6746 int group_id = group - conf->worker_groups; in raid5_do_work()
6754 spin_lock_irq(&conf->device_lock); in raid5_do_work()
6758 released = release_stripe_list(conf, worker->temp_inactive_list); in raid5_do_work()
6760 batch_size = handle_active_stripes(conf, group_id, worker, in raid5_do_work()
6761 worker->temp_inactive_list); in raid5_do_work()
6762 worker->working = false; in raid5_do_work()
6766 wait_event_lock_irq(mddev->sb_wait, in raid5_do_work()
6767 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), in raid5_do_work()
6768 conf->device_lock); in raid5_do_work()
6772 spin_unlock_irq(&conf->device_lock); in raid5_do_work()
6774 flush_deferred_bios(conf); in raid5_do_work()
6776 r5l_flush_stripe_to_raid(conf->log); in raid5_do_work()
6781 pr_debug("--- raid5worker inactive\n"); in raid5_do_work()
6793 struct mddev *mddev = thread->mddev; in raid5d()
6794 struct r5conf *conf = mddev->private; in raid5d() local
6804 spin_lock_irq(&conf->device_lock); in raid5d()
6810 released = release_stripe_list(conf, conf->temp_inactive_list); in raid5d()
6812 clear_bit(R5_DID_ALLOC, &conf->cache_state); in raid5d()
6815 !list_empty(&conf->bitmap_list)) { in raid5d()
6817 conf->seq_flush++; in raid5d()
6818 spin_unlock_irq(&conf->device_lock); in raid5d()
6819 md_bitmap_unplug(mddev->bitmap); in raid5d()
6820 spin_lock_irq(&conf->device_lock); in raid5d()
6821 conf->seq_write = conf->seq_flush; in raid5d()
6822 activate_bit_delay(conf, conf->temp_inactive_list); in raid5d()
6824 raid5_activate_delayed(conf); in raid5d()
6826 while ((bio = remove_bio_from_retry(conf, &offset))) { in raid5d()
6828 spin_unlock_irq(&conf->device_lock); in raid5d()
6829 ok = retry_aligned_read(conf, bio, offset); in raid5d()
6830 spin_lock_irq(&conf->device_lock); in raid5d()
6836 batch_size = handle_active_stripes(conf, ANY_GROUP, NULL, in raid5d()
6837 conf->temp_inactive_list); in raid5d()
6842 if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) { in raid5d()
6843 spin_unlock_irq(&conf->device_lock); in raid5d()
6845 spin_lock_irq(&conf->device_lock); in raid5d()
6855 wait_event_lock_irq(mddev->sb_wait, in raid5d()
6856 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags), in raid5d()
6857 conf->device_lock); in raid5d()
6861 spin_unlock_irq(&conf->device_lock); in raid5d()
6862 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && in raid5d()
6863 mutex_trylock(&conf->cache_size_mutex)) { in raid5d()
6864 grow_one_stripe(conf, __GFP_NOWARN); in raid5d()
6868 set_bit(R5_DID_ALLOC, &conf->cache_state); in raid5d()
6869 mutex_unlock(&conf->cache_size_mutex); in raid5d()
6872 flush_deferred_bios(conf); in raid5d()
6874 r5l_flush_stripe_to_raid(conf->log); in raid5d()
6879 pr_debug("--- raid5d inactive\n"); in raid5d()
6885 struct r5conf *conf; in raid5_show_stripe_cache_size() local
6887 spin_lock(&mddev->lock); in raid5_show_stripe_cache_size()
6888 conf = mddev->private; in raid5_show_stripe_cache_size()
6889 if (conf) in raid5_show_stripe_cache_size()
6890 ret = sprintf(page, "%d\n", conf->min_nr_stripes); in raid5_show_stripe_cache_size()
6891 spin_unlock(&mddev->lock); in raid5_show_stripe_cache_size()
6899 struct r5conf *conf = mddev->private; in raid5_set_cache_size() local
6902 return -EINVAL; in raid5_set_cache_size()
6904 conf->min_nr_stripes = size; in raid5_set_cache_size()
6905 mutex_lock(&conf->cache_size_mutex); in raid5_set_cache_size()
6906 while (size < conf->max_nr_stripes && in raid5_set_cache_size()
6907 drop_one_stripe(conf)) in raid5_set_cache_size()
6909 mutex_unlock(&conf->cache_size_mutex); in raid5_set_cache_size()
6913 mutex_lock(&conf->cache_size_mutex); in raid5_set_cache_size()
6914 while (size > conf->max_nr_stripes) in raid5_set_cache_size()
6915 if (!grow_one_stripe(conf, GFP_KERNEL)) { in raid5_set_cache_size()
6916 conf->min_nr_stripes = conf->max_nr_stripes; in raid5_set_cache_size()
6917 result = -ENOMEM; in raid5_set_cache_size()
6920 mutex_unlock(&conf->cache_size_mutex); in raid5_set_cache_size()
6929 struct r5conf *conf; in raid5_store_stripe_cache_size() local
6934 return -EINVAL; in raid5_store_stripe_cache_size()
6936 return -EINVAL; in raid5_store_stripe_cache_size()
6940 conf = mddev->private; in raid5_store_stripe_cache_size()
6941 if (!conf) in raid5_store_stripe_cache_size()
6942 err = -ENODEV; in raid5_store_stripe_cache_size()
6958 struct r5conf *conf = mddev->private; in raid5_show_rmw_level() local
6959 if (conf) in raid5_show_rmw_level()
6960 return sprintf(page, "%d\n", conf->rmw_level); in raid5_show_rmw_level()
6968 struct r5conf *conf = mddev->private; in raid5_store_rmw_level() local
6971 if (!conf) in raid5_store_rmw_level()
6972 return -ENODEV; in raid5_store_rmw_level()
6975 return -EINVAL; in raid5_store_rmw_level()
6978 return -EINVAL; in raid5_store_rmw_level()
6981 return -EINVAL; in raid5_store_rmw_level()
6986 return -EINVAL; in raid5_store_rmw_level()
6988 conf->rmw_level = new; in raid5_store_rmw_level()
7000 struct r5conf *conf; in raid5_show_stripe_size() local
7003 spin_lock(&mddev->lock); in raid5_show_stripe_size()
7004 conf = mddev->private; in raid5_show_stripe_size()
7005 if (conf) in raid5_show_stripe_size()
7006 ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf)); in raid5_show_stripe_size()
7007 spin_unlock(&mddev->lock); in raid5_show_stripe_size()
7015 struct r5conf *conf; in raid5_store_stripe_size() local
7021 return -EINVAL; in raid5_store_stripe_size()
7023 return -EINVAL; in raid5_store_stripe_size()
7033 return -EINVAL; in raid5_store_stripe_size()
7039 conf = mddev->private; in raid5_store_stripe_size()
7040 if (!conf) { in raid5_store_stripe_size()
7041 err = -ENODEV; in raid5_store_stripe_size()
7045 if (new == conf->stripe_size) in raid5_store_stripe_size()
7049 conf->stripe_size, new); in raid5_store_stripe_size()
7051 if (mddev->sync_thread || in raid5_store_stripe_size()
7052 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || in raid5_store_stripe_size()
7053 mddev->reshape_position != MaxSector || in raid5_store_stripe_size()
7054 mddev->sysfs_active) { in raid5_store_stripe_size()
7055 err = -EBUSY; in raid5_store_stripe_size()
7060 mutex_lock(&conf->cache_size_mutex); in raid5_store_stripe_size()
7061 size = conf->max_nr_stripes; in raid5_store_stripe_size()
7063 shrink_stripes(conf); in raid5_store_stripe_size()
7065 conf->stripe_size = new; in raid5_store_stripe_size()
7066 conf->stripe_shift = ilog2(new) - 9; in raid5_store_stripe_size()
7067 conf->stripe_sectors = new >> 9; in raid5_store_stripe_size()
7068 if (grow_stripes(conf, size)) { in raid5_store_stripe_size()
7071 err = -ENOMEM; in raid5_store_stripe_size()
7073 mutex_unlock(&conf->cache_size_mutex); in raid5_store_stripe_size()
7095 struct r5conf *conf; in raid5_show_preread_threshold() local
7097 spin_lock(&mddev->lock); in raid5_show_preread_threshold()
7098 conf = mddev->private; in raid5_show_preread_threshold()
7099 if (conf) in raid5_show_preread_threshold()
7100 ret = sprintf(page, "%d\n", conf->bypass_threshold); in raid5_show_preread_threshold()
7101 spin_unlock(&mddev->lock); in raid5_show_preread_threshold()
7108 struct r5conf *conf; in raid5_store_preread_threshold() local
7113 return -EINVAL; in raid5_store_preread_threshold()
7115 return -EINVAL; in raid5_store_preread_threshold()
7120 conf = mddev->private; in raid5_store_preread_threshold()
7121 if (!conf) in raid5_store_preread_threshold()
7122 err = -ENODEV; in raid5_store_preread_threshold()
7123 else if (new > conf->min_nr_stripes) in raid5_store_preread_threshold()
7124 err = -EINVAL; in raid5_store_preread_threshold()
7126 conf->bypass_threshold = new; in raid5_store_preread_threshold()
7140 struct r5conf *conf; in raid5_show_skip_copy() local
7142 spin_lock(&mddev->lock); in raid5_show_skip_copy()
7143 conf = mddev->private; in raid5_show_skip_copy()
7144 if (conf) in raid5_show_skip_copy()
7145 ret = sprintf(page, "%d\n", conf->skip_copy); in raid5_show_skip_copy()
7146 spin_unlock(&mddev->lock); in raid5_show_skip_copy()
7153 struct r5conf *conf; in raid5_store_skip_copy() local
7158 return -EINVAL; in raid5_store_skip_copy()
7160 return -EINVAL; in raid5_store_skip_copy()
7166 conf = mddev->private; in raid5_store_skip_copy()
7167 if (!conf) in raid5_store_skip_copy()
7168 err = -ENODEV; in raid5_store_skip_copy()
7169 else if (new != conf->skip_copy) { in raid5_store_skip_copy()
7170 struct request_queue *q = mddev->queue; in raid5_store_skip_copy()
7173 conf->skip_copy = new; in raid5_store_skip_copy()
7192 struct r5conf *conf = mddev->private; in stripe_cache_active_show() local
7193 if (conf) in stripe_cache_active_show()
7194 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); in stripe_cache_active_show()
7205 struct r5conf *conf; in raid5_show_group_thread_cnt() local
7207 spin_lock(&mddev->lock); in raid5_show_group_thread_cnt()
7208 conf = mddev->private; in raid5_show_group_thread_cnt()
7209 if (conf) in raid5_show_group_thread_cnt()
7210 ret = sprintf(page, "%d\n", conf->worker_cnt_per_group); in raid5_show_group_thread_cnt()
7211 spin_unlock(&mddev->lock); in raid5_show_group_thread_cnt()
7215 static int alloc_thread_groups(struct r5conf *conf, int cnt,
7221 struct r5conf *conf; in raid5_store_group_thread_cnt() local
7228 return -EINVAL; in raid5_store_group_thread_cnt()
7230 return -EINVAL; in raid5_store_group_thread_cnt()
7233 return -EINVAL; in raid5_store_group_thread_cnt()
7238 conf = mddev->private; in raid5_store_group_thread_cnt()
7239 if (!conf) in raid5_store_group_thread_cnt()
7240 err = -ENODEV; in raid5_store_group_thread_cnt()
7241 else if (new != conf->worker_cnt_per_group) { in raid5_store_group_thread_cnt()
7244 old_groups = conf->worker_groups; in raid5_store_group_thread_cnt()
7248 err = alloc_thread_groups(conf, new, &group_cnt, &new_groups); in raid5_store_group_thread_cnt()
7250 spin_lock_irq(&conf->device_lock); in raid5_store_group_thread_cnt()
7251 conf->group_cnt = group_cnt; in raid5_store_group_thread_cnt()
7252 conf->worker_cnt_per_group = new; in raid5_store_group_thread_cnt()
7253 conf->worker_groups = new_groups; in raid5_store_group_thread_cnt()
7254 spin_unlock_irq(&conf->device_lock); in raid5_store_group_thread_cnt()
7289 static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt, in alloc_thread_groups() argument
7309 return -ENOMEM; in alloc_thread_groups()
7316 INIT_LIST_HEAD(&group->handle_list); in alloc_thread_groups()
7317 INIT_LIST_HEAD(&group->loprio_list); in alloc_thread_groups()
7318 group->conf = conf; in alloc_thread_groups()
7319 group->workers = workers + i * cnt; in alloc_thread_groups()
7322 struct r5worker *worker = group->workers + j; in alloc_thread_groups()
7323 worker->group = group; in alloc_thread_groups()
7324 INIT_WORK(&worker->work, raid5_do_work); in alloc_thread_groups()
7327 INIT_LIST_HEAD(worker->temp_inactive_list + k); in alloc_thread_groups()
7334 static void free_thread_groups(struct r5conf *conf) in free_thread_groups() argument
7336 if (conf->worker_groups) in free_thread_groups()
7337 kfree(conf->worker_groups[0].workers); in free_thread_groups()
7338 kfree(conf->worker_groups); in free_thread_groups()
7339 conf->worker_groups = NULL; in free_thread_groups()
7345 struct r5conf *conf = mddev->private; in raid5_size() local
7348 sectors = mddev->dev_sectors; in raid5_size()
7351 raid_disks = min(conf->raid_disks, conf->previous_raid_disks); in raid5_size()
7353 sectors &= ~((sector_t)conf->chunk_sectors - 1); in raid5_size()
7354 sectors &= ~((sector_t)conf->prev_chunk_sectors - 1); in raid5_size()
7355 return sectors * (raid_disks - conf->max_degraded); in raid5_size()
7358 static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) in free_scratch_buffer() argument
7360 safe_put_page(percpu->spare_page); in free_scratch_buffer()
7361 percpu->spare_page = NULL; in free_scratch_buffer()
7362 kvfree(percpu->scribble); in free_scratch_buffer()
7363 percpu->scribble = NULL; in free_scratch_buffer()
7366 static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu) in alloc_scratch_buffer() argument
7368 if (conf->level == 6 && !percpu->spare_page) { in alloc_scratch_buffer()
7369 percpu->spare_page = alloc_page(GFP_KERNEL); in alloc_scratch_buffer()
7370 if (!percpu->spare_page) in alloc_scratch_buffer()
7371 return -ENOMEM; in alloc_scratch_buffer()
7375 max(conf->raid_disks, in alloc_scratch_buffer()
7376 conf->previous_raid_disks), in alloc_scratch_buffer()
7377 max(conf->chunk_sectors, in alloc_scratch_buffer()
7378 conf->prev_chunk_sectors) in alloc_scratch_buffer()
7379 / RAID5_STRIPE_SECTORS(conf))) { in alloc_scratch_buffer()
7380 free_scratch_buffer(conf, percpu); in alloc_scratch_buffer()
7381 return -ENOMEM; in alloc_scratch_buffer()
7384 local_lock_init(&percpu->lock); in alloc_scratch_buffer()
7390 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); in raid456_cpu_dead() local
7392 free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); in raid456_cpu_dead()
7396 static void raid5_free_percpu(struct r5conf *conf) in raid5_free_percpu() argument
7398 if (!conf->percpu) in raid5_free_percpu()
7401 cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); in raid5_free_percpu()
7402 free_percpu(conf->percpu); in raid5_free_percpu()
7405 static void free_conf(struct r5conf *conf) in free_conf() argument
7409 log_exit(conf); in free_conf()
7411 unregister_shrinker(&conf->shrinker); in free_conf()
7412 free_thread_groups(conf); in free_conf()
7413 shrink_stripes(conf); in free_conf()
7414 raid5_free_percpu(conf); in free_conf()
7415 for (i = 0; i < conf->pool_size; i++) in free_conf()
7416 if (conf->disks[i].extra_page) in free_conf()
7417 put_page(conf->disks[i].extra_page); in free_conf()
7418 kfree(conf->disks); in free_conf()
7419 bioset_exit(&conf->bio_split); in free_conf()
7420 kfree(conf->stripe_hashtbl); in free_conf()
7421 kfree(conf->pending_data); in free_conf()
7422 kfree(conf); in free_conf()
7427 struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); in raid456_cpu_up_prepare() local
7428 struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); in raid456_cpu_up_prepare()
7430 if (alloc_scratch_buffer(conf, percpu)) { in raid456_cpu_up_prepare()
7433 return -ENOMEM; in raid456_cpu_up_prepare()
7438 static int raid5_alloc_percpu(struct r5conf *conf) in raid5_alloc_percpu() argument
7442 conf->percpu = alloc_percpu(struct raid5_percpu); in raid5_alloc_percpu()
7443 if (!conf->percpu) in raid5_alloc_percpu()
7444 return -ENOMEM; in raid5_alloc_percpu()
7446 err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); in raid5_alloc_percpu()
7448 conf->scribble_disks = max(conf->raid_disks, in raid5_alloc_percpu()
7449 conf->previous_raid_disks); in raid5_alloc_percpu()
7450 conf->scribble_sectors = max(conf->chunk_sectors, in raid5_alloc_percpu()
7451 conf->prev_chunk_sectors); in raid5_alloc_percpu()
7459 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); in raid5_cache_scan() local
7462 if (mutex_trylock(&conf->cache_size_mutex)) { in raid5_cache_scan()
7464 while (ret < sc->nr_to_scan && in raid5_cache_scan()
7465 conf->max_nr_stripes > conf->min_nr_stripes) { in raid5_cache_scan()
7466 if (drop_one_stripe(conf) == 0) { in raid5_cache_scan()
7472 mutex_unlock(&conf->cache_size_mutex); in raid5_cache_scan()
7480 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); in raid5_cache_count() local
7482 if (conf->max_nr_stripes < conf->min_nr_stripes) in raid5_cache_count()
7485 return conf->max_nr_stripes - conf->min_nr_stripes; in raid5_cache_count()
7490 struct r5conf *conf; in setup_conf() local
7498 int ret = -ENOMEM; in setup_conf()
7500 if (mddev->new_level != 5 in setup_conf()
7501 && mddev->new_level != 4 in setup_conf()
7502 && mddev->new_level != 6) { in setup_conf()
7504 mdname(mddev), mddev->new_level); in setup_conf()
7505 return ERR_PTR(-EIO); in setup_conf()
7507 if ((mddev->new_level == 5 in setup_conf()
7508 && !algorithm_valid_raid5(mddev->new_layout)) || in setup_conf()
7509 (mddev->new_level == 6 in setup_conf()
7510 && !algorithm_valid_raid6(mddev->new_layout))) { in setup_conf()
7512 mdname(mddev), mddev->new_layout); in setup_conf()
7513 return ERR_PTR(-EIO); in setup_conf()
7515 if (mddev->new_level == 6 && mddev->raid_disks < 4) { in setup_conf()
7517 mdname(mddev), mddev->raid_disks); in setup_conf()
7518 return ERR_PTR(-EINVAL); in setup_conf()
7521 if (!mddev->new_chunk_sectors || in setup_conf()
7522 (mddev->new_chunk_sectors << 9) % PAGE_SIZE || in setup_conf()
7523 !is_power_of_2(mddev->new_chunk_sectors)) { in setup_conf()
7525 mdname(mddev), mddev->new_chunk_sectors << 9); in setup_conf()
7526 return ERR_PTR(-EINVAL); in setup_conf()
7529 conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); in setup_conf()
7530 if (conf == NULL) in setup_conf()
7534 conf->stripe_size = DEFAULT_STRIPE_SIZE; in setup_conf()
7535 conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9; in setup_conf()
7536 conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9; in setup_conf()
7538 INIT_LIST_HEAD(&conf->free_list); in setup_conf()
7539 INIT_LIST_HEAD(&conf->pending_list); in setup_conf()
7540 conf->pending_data = kcalloc(PENDING_IO_MAX, in setup_conf()
7543 if (!conf->pending_data) in setup_conf()
7546 list_add(&conf->pending_data[i].sibling, &conf->free_list); in setup_conf()
7547 /* Don't enable multi-threading by default*/ in setup_conf()
7548 if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) { in setup_conf()
7549 conf->group_cnt = group_cnt; in setup_conf()
7550 conf->worker_cnt_per_group = 0; in setup_conf()
7551 conf->worker_groups = new_group; in setup_conf()
7554 spin_lock_init(&conf->device_lock); in setup_conf()
7555 seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock); in setup_conf()
7556 mutex_init(&conf->cache_size_mutex); in setup_conf()
7558 init_waitqueue_head(&conf->wait_for_quiescent); in setup_conf()
7559 init_waitqueue_head(&conf->wait_for_stripe); in setup_conf()
7560 init_waitqueue_head(&conf->wait_for_overlap); in setup_conf()
7561 INIT_LIST_HEAD(&conf->handle_list); in setup_conf()
7562 INIT_LIST_HEAD(&conf->loprio_list); in setup_conf()
7563 INIT_LIST_HEAD(&conf->hold_list); in setup_conf()
7564 INIT_LIST_HEAD(&conf->delayed_list); in setup_conf()
7565 INIT_LIST_HEAD(&conf->bitmap_list); in setup_conf()
7566 init_llist_head(&conf->released_stripes); in setup_conf()
7567 atomic_set(&conf->active_stripes, 0); in setup_conf()
7568 atomic_set(&conf->preread_active_stripes, 0); in setup_conf()
7569 atomic_set(&conf->active_aligned_reads, 0); in setup_conf()
7570 spin_lock_init(&conf->pending_bios_lock); in setup_conf()
7571 conf->batch_bio_dispatch = true; in setup_conf()
7573 if (test_bit(Journal, &rdev->flags)) in setup_conf()
7575 if (bdev_nonrot(rdev->bdev)) { in setup_conf()
7576 conf->batch_bio_dispatch = false; in setup_conf()
7581 conf->bypass_threshold = BYPASS_THRESHOLD; in setup_conf()
7582 conf->recovery_disabled = mddev->recovery_disabled - 1; in setup_conf()
7584 conf->raid_disks = mddev->raid_disks; in setup_conf()
7585 if (mddev->reshape_position == MaxSector) in setup_conf()
7586 conf->previous_raid_disks = mddev->raid_disks; in setup_conf()
7588 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; in setup_conf()
7589 max_disks = max(conf->raid_disks, conf->previous_raid_disks); in setup_conf()
7591 conf->disks = kcalloc(max_disks, sizeof(struct disk_info), in setup_conf()
7594 if (!conf->disks) in setup_conf()
7598 conf->disks[i].extra_page = alloc_page(GFP_KERNEL); in setup_conf()
7599 if (!conf->disks[i].extra_page) in setup_conf()
7603 ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0); in setup_conf()
7606 conf->mddev = mddev; in setup_conf()
7608 ret = -ENOMEM; in setup_conf()
7609 conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL); in setup_conf()
7610 if (!conf->stripe_hashtbl) in setup_conf()
7618 spin_lock_init(conf->hash_locks); in setup_conf()
7620 spin_lock_init(conf->hash_locks + i); in setup_conf()
7623 INIT_LIST_HEAD(conf->inactive_list + i); in setup_conf()
7626 INIT_LIST_HEAD(conf->temp_inactive_list + i); in setup_conf()
7628 atomic_set(&conf->r5c_cached_full_stripes, 0); in setup_conf()
7629 INIT_LIST_HEAD(&conf->r5c_full_stripe_list); in setup_conf()
7630 atomic_set(&conf->r5c_cached_partial_stripes, 0); in setup_conf()
7631 INIT_LIST_HEAD(&conf->r5c_partial_stripe_list); in setup_conf()
7632 atomic_set(&conf->r5c_flushing_full_stripes, 0); in setup_conf()
7633 atomic_set(&conf->r5c_flushing_partial_stripes, 0); in setup_conf()
7635 conf->level = mddev->new_level; in setup_conf()
7636 conf->chunk_sectors = mddev->new_chunk_sectors; in setup_conf()
7637 ret = raid5_alloc_percpu(conf); in setup_conf()
7643 ret = -EIO; in setup_conf()
7645 raid_disk = rdev->raid_disk; in setup_conf()
7647 || raid_disk < 0 || test_bit(Journal, &rdev->flags)) in setup_conf()
7649 disk = conf->disks + raid_disk; in setup_conf()
7651 if (test_bit(Replacement, &rdev->flags)) { in setup_conf()
7652 if (disk->replacement) in setup_conf()
7654 RCU_INIT_POINTER(disk->replacement, rdev); in setup_conf()
7656 if (disk->rdev) in setup_conf()
7658 RCU_INIT_POINTER(disk->rdev, rdev); in setup_conf()
7661 if (test_bit(In_sync, &rdev->flags)) { in setup_conf()
7663 mdname(mddev), rdev->bdev, raid_disk); in setup_conf()
7664 } else if (rdev->saved_raid_disk != raid_disk) in setup_conf()
7666 conf->fullsync = 1; in setup_conf()
7669 conf->level = mddev->new_level; in setup_conf()
7670 if (conf->level == 6) { in setup_conf()
7671 conf->max_degraded = 2; in setup_conf()
7673 conf->rmw_level = PARITY_ENABLE_RMW; in setup_conf()
7675 conf->rmw_level = PARITY_DISABLE_RMW; in setup_conf()
7677 conf->max_degraded = 1; in setup_conf()
7678 conf->rmw_level = PARITY_ENABLE_RMW; in setup_conf()
7680 conf->algorithm = mddev->new_layout; in setup_conf()
7681 conf->reshape_progress = mddev->reshape_position; in setup_conf()
7682 if (conf->reshape_progress != MaxSector) { in setup_conf()
7683 conf->prev_chunk_sectors = mddev->chunk_sectors; in setup_conf()
7684 conf->prev_algo = mddev->layout; in setup_conf()
7686 conf->prev_chunk_sectors = conf->chunk_sectors; in setup_conf()
7687 conf->prev_algo = conf->algorithm; in setup_conf()
7690 conf->min_nr_stripes = NR_STRIPES; in setup_conf()
7691 if (mddev->reshape_position != MaxSector) { in setup_conf()
7693 ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4, in setup_conf()
7694 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4); in setup_conf()
7695 conf->min_nr_stripes = max(NR_STRIPES, stripes); in setup_conf()
7696 if (conf->min_nr_stripes != NR_STRIPES) in setup_conf()
7698 mdname(mddev), conf->min_nr_stripes); in setup_conf()
7700 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + in setup_conf()
7702 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); in setup_conf()
7703 if (grow_stripes(conf, conf->min_nr_stripes)) { in setup_conf()
7706 ret = -ENOMEM; in setup_conf()
7715 conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4; in setup_conf()
7716 conf->shrinker.scan_objects = raid5_cache_scan; in setup_conf()
7717 conf->shrinker.count_objects = raid5_cache_count; in setup_conf()
7718 conf->shrinker.batch = 128; in setup_conf()
7719 conf->shrinker.flags = 0; in setup_conf()
7720 ret = register_shrinker(&conf->shrinker, "md-raid5:%s", mdname(mddev)); in setup_conf()
7727 sprintf(pers_name, "raid%d", mddev->new_level); in setup_conf()
7728 rcu_assign_pointer(conf->thread, in setup_conf()
7730 if (!conf->thread) { in setup_conf()
7733 ret = -ENOMEM; in setup_conf()
7737 return conf; in setup_conf()
7740 if (conf) in setup_conf()
7741 free_conf(conf); in setup_conf()
7753 if (raid_disk >= raid_disks - max_degraded) in only_parity()
7758 raid_disk == raid_disks - 1) in only_parity()
7765 if (raid_disk == raid_disks - 1) in only_parity()
7771 static void raid5_set_io_opt(struct r5conf *conf) in raid5_set_io_opt() argument
7773 blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) * in raid5_set_io_opt()
7774 (conf->raid_disks - conf->max_degraded)); in raid5_set_io_opt()
7779 struct r5conf *conf; in raid5_run() local
7789 return -ENOMEM; in raid5_run()
7791 if (mddev->recovery_cp != MaxSector) in raid5_run()
7792 pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", in raid5_run()
7798 if (test_bit(Journal, &rdev->flags)) { in raid5_run()
7802 if (rdev->raid_disk < 0) in raid5_run()
7804 diff = (rdev->new_data_offset - rdev->data_offset); in raid5_run()
7808 } else if (mddev->reshape_backwards && in raid5_run()
7811 else if (!mddev->reshape_backwards && in raid5_run()
7816 if ((test_bit(MD_HAS_JOURNAL, &mddev->flags) || journal_dev) && in raid5_run()
7817 (mddev->bitmap_info.offset || mddev->bitmap_info.file)) { in raid5_run()
7820 return -EINVAL; in raid5_run()
7823 if (mddev->reshape_position != MaxSector) { in raid5_run()
7833 * critical areas read-only and backed up. It will start in raid5_run()
7834 * the array in read-only mode, so we check for that. in raid5_run()
7838 int max_degraded = (mddev->level == 6 ? 2 : 1); in raid5_run()
7843 pr_warn("md/raid:%s: don't support reshape with journal - aborting.\n", in raid5_run()
7845 return -EINVAL; in raid5_run()
7848 if (mddev->new_level != mddev->level) { in raid5_run()
7849 pr_warn("md/raid:%s: unsupported reshape required - aborting.\n", in raid5_run()
7851 return -EINVAL; in raid5_run()
7853 old_disks = mddev->raid_disks - mddev->delta_disks; in raid5_run()
7854 /* reshape_position must be on a new-stripe boundary, and one in raid5_run()
7861 here_new = mddev->reshape_position; in raid5_run()
7862 chunk_sectors = max(mddev->chunk_sectors, mddev->new_chunk_sectors); in raid5_run()
7863 new_data_disks = mddev->raid_disks - max_degraded; in raid5_run()
7867 return -EINVAL; in raid5_run()
7871 here_old = mddev->reshape_position; in raid5_run()
7872 sector_div(here_old, chunk_sectors * (old_disks-max_degraded)); in raid5_run()
7875 if (mddev->delta_disks == 0) { in raid5_run()
7876 /* We cannot be sure it is safe to start an in-place in raid5_run()
7877 * reshape. It is only safe if user-space is monitoring in raid5_run()
7883 if (abs(min_offset_diff) >= mddev->chunk_sectors && in raid5_run()
7884 abs(min_offset_diff) >= mddev->new_chunk_sectors) in raid5_run()
7885 /* not really in-place - so OK */; in raid5_run()
7886 else if (mddev->ro == 0) { in raid5_run()
7887 pr_warn("md/raid:%s: in-place reshape must be started in read-only mode - aborting\n", in raid5_run()
7889 return -EINVAL; in raid5_run()
7891 } else if (mddev->reshape_backwards in raid5_run()
7895 here_old * chunk_sectors + (-min_offset_diff))) { in raid5_run()
7896 /* Reading from the same stripe as writing to - bad */ in raid5_run()
7897 pr_warn("md/raid:%s: reshape_position too early for auto-recovery - aborting.\n", in raid5_run()
7899 return -EINVAL; in raid5_run()
7904 BUG_ON(mddev->level != mddev->new_level); in raid5_run()
7905 BUG_ON(mddev->layout != mddev->new_layout); in raid5_run()
7906 BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors); in raid5_run()
7907 BUG_ON(mddev->delta_disks != 0); in raid5_run()
7910 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && in raid5_run()
7911 test_bit(MD_HAS_PPL, &mddev->flags)) { in raid5_run()
7912 pr_warn("md/raid:%s: using journal device and PPL not allowed - disabling PPL\n", in raid5_run()
7914 clear_bit(MD_HAS_PPL, &mddev->flags); in raid5_run()
7915 clear_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags); in raid5_run()
7918 if (mddev->private == NULL) in raid5_run()
7919 conf = setup_conf(mddev); in raid5_run()
7921 conf = mddev->private; in raid5_run()
7923 if (IS_ERR(conf)) in raid5_run()
7924 return PTR_ERR(conf); in raid5_run()
7926 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { in raid5_run()
7930 mddev->ro = 1; in raid5_run()
7931 set_disk_ro(mddev->gendisk, 1); in raid5_run()
7932 } else if (mddev->recovery_cp == MaxSector) in raid5_run()
7933 set_bit(MD_JOURNAL_CLEAN, &mddev->flags); in raid5_run()
7936 conf->min_offset_diff = min_offset_diff; in raid5_run()
7937 rcu_assign_pointer(mddev->thread, conf->thread); in raid5_run()
7938 rcu_assign_pointer(conf->thread, NULL); in raid5_run()
7939 mddev->private = conf; in raid5_run()
7941 for (i = 0; i < conf->raid_disks && conf->previous_raid_disks; in raid5_run()
7943 rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev); in raid5_run()
7944 if (!rdev && conf->disks[i].replacement) { in raid5_run()
7947 conf->disks[i].replacement); in raid5_run()
7948 conf->disks[i].replacement = NULL; in raid5_run()
7949 clear_bit(Replacement, &rdev->flags); in raid5_run()
7950 rcu_assign_pointer(conf->disks[i].rdev, rdev); in raid5_run()
7954 if (rcu_access_pointer(conf->disks[i].replacement) && in raid5_run()
7955 conf->reshape_progress != MaxSector) { in raid5_run()
7960 if (test_bit(In_sync, &rdev->flags)) in raid5_run()
7962 /* This disc is not fully in-sync. However if it in raid5_run()
7971 if (mddev->major_version == 0 && in raid5_run()
7972 mddev->minor_version > 90) in raid5_run()
7973 rdev->recovery_offset = reshape_offset; in raid5_run()
7975 if (rdev->recovery_offset < reshape_offset) { in raid5_run()
7977 if (!only_parity(rdev->raid_disk, in raid5_run()
7978 conf->algorithm, in raid5_run()
7979 conf->raid_disks, in raid5_run()
7980 conf->max_degraded)) in raid5_run()
7983 if (!only_parity(rdev->raid_disk, in raid5_run()
7984 conf->prev_algo, in raid5_run()
7985 conf->previous_raid_disks, in raid5_run()
7986 conf->max_degraded)) in raid5_run()
7994 mddev->degraded = raid5_calc_degraded(conf); in raid5_run()
7996 if (has_failed(conf)) { in raid5_run()
7998 mdname(mddev), mddev->degraded, conf->raid_disks); in raid5_run()
8003 mddev->dev_sectors &= ~((sector_t)mddev->chunk_sectors - 1); in raid5_run()
8004 mddev->resync_max_sectors = mddev->dev_sectors; in raid5_run()
8006 if (mddev->degraded > dirty_parity_disks && in raid5_run()
8007 mddev->recovery_cp != MaxSector) { in raid5_run()
8008 if (test_bit(MD_HAS_PPL, &mddev->flags)) in raid5_run()
8011 else if (mddev->ok_start_degraded) in raid5_run()
8012 pr_crit("md/raid:%s: starting dirty degraded array - data corruption possible.\n", in raid5_run()
8022 mdname(mddev), conf->level, in raid5_run()
8023 mddev->raid_disks-mddev->degraded, mddev->raid_disks, in raid5_run()
8024 mddev->new_layout); in raid5_run()
8026 print_raid5_conf(conf); in raid5_run()
8028 if (conf->reshape_progress != MaxSector) { in raid5_run()
8029 conf->reshape_safe = conf->reshape_progress; in raid5_run()
8030 atomic_set(&conf->reshape_stripes, 0); in raid5_run()
8031 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); in raid5_run()
8032 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); in raid5_run()
8033 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); in raid5_run()
8034 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); in raid5_run()
8035 rcu_assign_pointer(mddev->sync_thread, in raid5_run()
8037 if (!mddev->sync_thread) in raid5_run()
8042 if (mddev->to_remove == &raid5_attrs_group) in raid5_run()
8043 mddev->to_remove = NULL; in raid5_run()
8044 else if (mddev->kobj.sd && in raid5_run()
8045 sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) in raid5_run()
8050 if (mddev->queue) { in raid5_run()
8052 /* read-ahead size must cover two whole stripes, which in raid5_run()
8056 int data_disks = conf->previous_raid_disks - conf->max_degraded; in raid5_run()
8058 ((mddev->chunk_sectors << 9) / PAGE_SIZE); in raid5_run()
8060 chunk_size = mddev->chunk_sectors << 9; in raid5_run()
8061 blk_queue_io_min(mddev->queue, chunk_size); in raid5_run()
8062 raid5_set_io_opt(conf); in raid5_run()
8063 mddev->queue->limits.raid_partial_stripes_expensive = 1; in raid5_run()
8070 mddev->queue->limits.discard_granularity = stripe; in raid5_run()
8072 blk_queue_max_write_zeroes_sectors(mddev->queue, 0); in raid5_run()
8075 disk_stack_limits(mddev->gendisk, rdev->bdev, in raid5_run()
8076 rdev->data_offset << 9); in raid5_run()
8077 disk_stack_limits(mddev->gendisk, rdev->bdev, in raid5_run()
8078 rdev->new_data_offset << 9); in raid5_run()
8097 mddev->queue->limits.max_discard_sectors < (stripe >> 9) || in raid5_run()
8098 mddev->queue->limits.discard_granularity < stripe) in raid5_run()
8099 blk_queue_max_discard_sectors(mddev->queue, 0); in raid5_run()
8105 blk_queue_max_hw_sectors(mddev->queue, in raid5_run()
8106 RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf)); in raid5_run()
8109 blk_queue_max_segments(mddev->queue, USHRT_MAX); in raid5_run()
8112 if (log_init(conf, journal_dev, raid5_has_ppl(conf))) in raid5_run()
8117 md_unregister_thread(mddev, &mddev->thread); in raid5_run()
8118 print_raid5_conf(conf); in raid5_run()
8119 free_conf(conf); in raid5_run()
8120 mddev->private = NULL; in raid5_run()
8122 return -EIO; in raid5_run()
8127 struct r5conf *conf = priv; in raid5_free() local
8129 free_conf(conf); in raid5_free()
8130 mddev->to_remove = &raid5_attrs_group; in raid5_free()
8135 struct r5conf *conf = mddev->private; in raid5_status() local
8138 seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level, in raid5_status()
8139 conf->chunk_sectors / 2, mddev->layout); in raid5_status()
8140 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); in raid5_status()
8142 for (i = 0; i < conf->raid_disks; i++) { in raid5_status()
8143 struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); in raid5_status()
8144 seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); in raid5_status()
8150 static void print_raid5_conf (struct r5conf *conf) in print_raid5_conf() argument
8155 pr_debug("RAID conf printout:\n"); in print_raid5_conf()
8156 if (!conf) { in print_raid5_conf()
8157 pr_debug("(conf==NULL)\n"); in print_raid5_conf()
8160 pr_debug(" --- level:%d rd:%d wd:%d\n", conf->level, in print_raid5_conf()
8161 conf->raid_disks, in print_raid5_conf()
8162 conf->raid_disks - conf->mddev->degraded); in print_raid5_conf()
8165 for (i = 0; i < conf->raid_disks; i++) { in print_raid5_conf()
8166 rdev = rcu_dereference(conf->disks[i].rdev); in print_raid5_conf()
8169 i, !test_bit(Faulty, &rdev->flags), in print_raid5_conf()
8170 rdev->bdev); in print_raid5_conf()
8178 struct r5conf *conf = mddev->private; in raid5_spare_active() local
8183 for (i = 0; i < conf->raid_disks; i++) { in raid5_spare_active()
8184 rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev); in raid5_spare_active()
8186 conf->disks[i].replacement); in raid5_spare_active()
8188 && replacement->recovery_offset == MaxSector in raid5_spare_active()
8189 && !test_bit(Faulty, &replacement->flags) in raid5_spare_active()
8190 && !test_and_set_bit(In_sync, &replacement->flags)) { in raid5_spare_active()
8193 || !test_and_clear_bit(In_sync, &rdev->flags)) in raid5_spare_active()
8198 * and never re-added. in raid5_spare_active()
8200 set_bit(Faulty, &rdev->flags); in raid5_spare_active()
8202 rdev->sysfs_state); in raid5_spare_active()
8204 sysfs_notify_dirent_safe(replacement->sysfs_state); in raid5_spare_active()
8206 && rdev->recovery_offset == MaxSector in raid5_spare_active()
8207 && !test_bit(Faulty, &rdev->flags) in raid5_spare_active()
8208 && !test_and_set_bit(In_sync, &rdev->flags)) { in raid5_spare_active()
8210 sysfs_notify_dirent_safe(rdev->sysfs_state); in raid5_spare_active()
8213 spin_lock_irqsave(&conf->device_lock, flags); in raid5_spare_active()
8214 mddev->degraded = raid5_calc_degraded(conf); in raid5_spare_active()
8215 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_spare_active()
8216 print_raid5_conf(conf); in raid5_spare_active()
8222 struct r5conf *conf = mddev->private; in raid5_remove_disk() local
8224 int number = rdev->raid_disk; in raid5_remove_disk()
8229 print_raid5_conf(conf); in raid5_remove_disk()
8230 if (test_bit(Journal, &rdev->flags) && conf->log) { in raid5_remove_disk()
8237 if (atomic_read(&conf->active_stripes) || in raid5_remove_disk()
8238 atomic_read(&conf->r5c_cached_full_stripes) || in raid5_remove_disk()
8239 atomic_read(&conf->r5c_cached_partial_stripes)) { in raid5_remove_disk()
8240 return -EBUSY; in raid5_remove_disk()
8242 log_exit(conf); in raid5_remove_disk()
8245 if (unlikely(number >= conf->pool_size)) in raid5_remove_disk()
8247 p = conf->disks + number; in raid5_remove_disk()
8248 if (rdev == rcu_access_pointer(p->rdev)) in raid5_remove_disk()
8249 rdevp = &p->rdev; in raid5_remove_disk()
8250 else if (rdev == rcu_access_pointer(p->replacement)) in raid5_remove_disk()
8251 rdevp = &p->replacement; in raid5_remove_disk()
8255 if (number >= conf->raid_disks && in raid5_remove_disk()
8256 conf->reshape_progress == MaxSector) in raid5_remove_disk()
8257 clear_bit(In_sync, &rdev->flags); in raid5_remove_disk()
8259 if (test_bit(In_sync, &rdev->flags) || in raid5_remove_disk()
8260 atomic_read(&rdev->nr_pending)) { in raid5_remove_disk()
8261 err = -EBUSY; in raid5_remove_disk()
8264 /* Only remove non-faulty devices if recovery in raid5_remove_disk()
8267 if (!test_bit(Faulty, &rdev->flags) && in raid5_remove_disk()
8268 mddev->recovery_disabled != conf->recovery_disabled && in raid5_remove_disk()
8269 !has_failed(conf) && in raid5_remove_disk()
8270 (!rcu_access_pointer(p->replacement) || in raid5_remove_disk()
8271 rcu_access_pointer(p->replacement) == rdev) && in raid5_remove_disk()
8272 number < conf->raid_disks) { in raid5_remove_disk()
8273 err = -EBUSY; in raid5_remove_disk()
8277 if (!test_bit(RemoveSynchronized, &rdev->flags)) { in raid5_remove_disk()
8278 lockdep_assert_held(&mddev->reconfig_mutex); in raid5_remove_disk()
8280 if (atomic_read(&rdev->nr_pending)) { in raid5_remove_disk()
8282 err = -EBUSY; in raid5_remove_disk()
8287 err = log_modify(conf, rdev, false); in raid5_remove_disk()
8292 tmp = rcu_access_pointer(p->replacement); in raid5_remove_disk()
8295 rcu_assign_pointer(p->rdev, tmp); in raid5_remove_disk()
8296 clear_bit(Replacement, &tmp->flags); in raid5_remove_disk()
8298 * but will never see neither - if they are careful in raid5_remove_disk()
8300 rcu_assign_pointer(p->replacement, NULL); in raid5_remove_disk()
8303 err = log_modify(conf, tmp, true); in raid5_remove_disk()
8306 clear_bit(WantReplacement, &rdev->flags); in raid5_remove_disk()
8309 print_raid5_conf(conf); in raid5_remove_disk()
8315 struct r5conf *conf = mddev->private; in raid5_add_disk() local
8316 int ret, err = -EEXIST; in raid5_add_disk()
8321 int last = conf->raid_disks - 1; in raid5_add_disk()
8323 if (test_bit(Journal, &rdev->flags)) { in raid5_add_disk()
8324 if (conf->log) in raid5_add_disk()
8325 return -EBUSY; in raid5_add_disk()
8327 rdev->raid_disk = 0; in raid5_add_disk()
8332 ret = log_init(conf, rdev, false); in raid5_add_disk()
8336 ret = r5l_start(conf->log); in raid5_add_disk()
8342 if (mddev->recovery_disabled == conf->recovery_disabled) in raid5_add_disk()
8343 return -EBUSY; in raid5_add_disk()
8345 if (rdev->saved_raid_disk < 0 && has_failed(conf)) in raid5_add_disk()
8347 return -EINVAL; in raid5_add_disk()
8349 if (rdev->raid_disk >= 0) in raid5_add_disk()
8350 first = last = rdev->raid_disk; in raid5_add_disk()
8353 * find the disk ... but prefer rdev->saved_raid_disk in raid5_add_disk()
8356 if (rdev->saved_raid_disk >= first && in raid5_add_disk()
8357 rdev->saved_raid_disk <= last && in raid5_add_disk()
8358 conf->disks[rdev->saved_raid_disk].rdev == NULL) in raid5_add_disk()
8359 first = rdev->saved_raid_disk; in raid5_add_disk()
8362 p = conf->disks + disk; in raid5_add_disk()
8363 if (p->rdev == NULL) { in raid5_add_disk()
8364 clear_bit(In_sync, &rdev->flags); in raid5_add_disk()
8365 rdev->raid_disk = disk; in raid5_add_disk()
8366 if (rdev->saved_raid_disk != disk) in raid5_add_disk()
8367 conf->fullsync = 1; in raid5_add_disk()
8368 rcu_assign_pointer(p->rdev, rdev); in raid5_add_disk()
8370 err = log_modify(conf, rdev, true); in raid5_add_disk()
8376 p = conf->disks + disk; in raid5_add_disk()
8377 tmp = rdev_mdlock_deref(mddev, p->rdev); in raid5_add_disk()
8378 if (test_bit(WantReplacement, &tmp->flags) && in raid5_add_disk()
8379 mddev->reshape_position == MaxSector && in raid5_add_disk()
8380 p->replacement == NULL) { in raid5_add_disk()
8381 clear_bit(In_sync, &rdev->flags); in raid5_add_disk()
8382 set_bit(Replacement, &rdev->flags); in raid5_add_disk()
8383 rdev->raid_disk = disk; in raid5_add_disk()
8385 conf->fullsync = 1; in raid5_add_disk()
8386 rcu_assign_pointer(p->replacement, rdev); in raid5_add_disk()
8391 print_raid5_conf(conf); in raid5_add_disk()
8405 struct r5conf *conf = mddev->private; in raid5_resize() local
8407 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in raid5_resize()
8408 return -EINVAL; in raid5_resize()
8409 sectors &= ~((sector_t)conf->chunk_sectors - 1); in raid5_resize()
8410 newsize = raid5_size(mddev, sectors, mddev->raid_disks); in raid5_resize()
8411 if (mddev->external_size && in raid5_resize()
8412 mddev->array_sectors > newsize) in raid5_resize()
8413 return -EINVAL; in raid5_resize()
8414 if (mddev->bitmap) { in raid5_resize()
8415 int ret = md_bitmap_resize(mddev->bitmap, sectors, 0, 0); in raid5_resize()
8420 if (sectors > mddev->dev_sectors && in raid5_resize()
8421 mddev->recovery_cp > mddev->dev_sectors) { in raid5_resize()
8422 mddev->recovery_cp = mddev->dev_sectors; in raid5_resize()
8423 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); in raid5_resize()
8425 mddev->dev_sectors = sectors; in raid5_resize()
8426 mddev->resync_max_sectors = sectors; in raid5_resize()
8437 * If the chunk size is greater, user-space should request more in check_stripe_cache()
8440 struct r5conf *conf = mddev->private; in check_stripe_cache() local
8441 if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 in check_stripe_cache()
8442 > conf->min_nr_stripes || in check_stripe_cache()
8443 ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 in check_stripe_cache()
8444 > conf->min_nr_stripes) { in check_stripe_cache()
8447 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) in check_stripe_cache()
8448 / RAID5_STRIPE_SIZE(conf))*4); in check_stripe_cache()
8456 struct r5conf *conf = mddev->private; in check_reshape() local
8458 if (raid5_has_log(conf) || raid5_has_ppl(conf)) in check_reshape()
8459 return -EINVAL; in check_reshape()
8460 if (mddev->delta_disks == 0 && in check_reshape()
8461 mddev->new_layout == mddev->layout && in check_reshape()
8462 mddev->new_chunk_sectors == mddev->chunk_sectors) in check_reshape()
8464 if (has_failed(conf)) in check_reshape()
8465 return -EINVAL; in check_reshape()
8466 if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) { in check_reshape()
8473 if (mddev->level == 6) in check_reshape()
8475 if (mddev->raid_disks + mddev->delta_disks < min) in check_reshape()
8476 return -EINVAL; in check_reshape()
8480 return -ENOSPC; in check_reshape()
8482 if (mddev->new_chunk_sectors > mddev->chunk_sectors || in check_reshape()
8483 mddev->delta_disks > 0) in check_reshape()
8484 if (resize_chunks(conf, in check_reshape()
8485 conf->previous_raid_disks in check_reshape()
8486 + max(0, mddev->delta_disks), in check_reshape()
8487 max(mddev->new_chunk_sectors, in check_reshape()
8488 mddev->chunk_sectors) in check_reshape()
8490 return -ENOMEM; in check_reshape()
8492 if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size) in check_reshape()
8494 return resize_stripes(conf, (conf->previous_raid_disks in check_reshape()
8495 + mddev->delta_disks)); in check_reshape()
8500 struct r5conf *conf = mddev->private; in raid5_start_reshape() local
8506 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) in raid5_start_reshape()
8507 return -EBUSY; in raid5_start_reshape()
8510 return -ENOSPC; in raid5_start_reshape()
8512 if (has_failed(conf)) in raid5_start_reshape()
8513 return -EINVAL; in raid5_start_reshape()
8516 if (mddev->recovery_cp < MaxSector) in raid5_start_reshape()
8517 return -EBUSY; in raid5_start_reshape()
8518 for (i = 0; i < conf->raid_disks; i++) in raid5_start_reshape()
8519 if (rdev_mdlock_deref(mddev, conf->disks[i].replacement)) in raid5_start_reshape()
8520 return -EBUSY; in raid5_start_reshape()
8523 if (!test_bit(In_sync, &rdev->flags) in raid5_start_reshape()
8524 && !test_bit(Faulty, &rdev->flags)) in raid5_start_reshape()
8528 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) in raid5_start_reshape()
8532 return -EINVAL; in raid5_start_reshape()
8538 if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks) in raid5_start_reshape()
8539 < mddev->array_sectors) { in raid5_start_reshape()
8542 return -EINVAL; in raid5_start_reshape()
8545 atomic_set(&conf->reshape_stripes, 0); in raid5_start_reshape()
8546 spin_lock_irq(&conf->device_lock); in raid5_start_reshape()
8547 write_seqcount_begin(&conf->gen_lock); in raid5_start_reshape()
8548 conf->previous_raid_disks = conf->raid_disks; in raid5_start_reshape()
8549 conf->raid_disks += mddev->delta_disks; in raid5_start_reshape()
8550 conf->prev_chunk_sectors = conf->chunk_sectors; in raid5_start_reshape()
8551 conf->chunk_sectors = mddev->new_chunk_sectors; in raid5_start_reshape()
8552 conf->prev_algo = conf->algorithm; in raid5_start_reshape()
8553 conf->algorithm = mddev->new_layout; in raid5_start_reshape()
8554 conf->generation++; in raid5_start_reshape()
8556 * if reshape_progress has been set - so a memory barrier needed. in raid5_start_reshape()
8559 if (mddev->reshape_backwards) in raid5_start_reshape()
8560 conf->reshape_progress = raid5_size(mddev, 0, 0); in raid5_start_reshape()
8562 conf->reshape_progress = 0; in raid5_start_reshape()
8563 conf->reshape_safe = conf->reshape_progress; in raid5_start_reshape()
8564 write_seqcount_end(&conf->gen_lock); in raid5_start_reshape()
8565 spin_unlock_irq(&conf->device_lock); in raid5_start_reshape()
8568 * the reshape wasn't running - like Discard or Read - have in raid5_start_reshape()
8581 if (mddev->delta_disks >= 0) { in raid5_start_reshape()
8583 if (rdev->raid_disk < 0 && in raid5_start_reshape()
8584 !test_bit(Faulty, &rdev->flags)) { in raid5_start_reshape()
8586 if (rdev->raid_disk in raid5_start_reshape()
8587 >= conf->previous_raid_disks) in raid5_start_reshape()
8588 set_bit(In_sync, &rdev->flags); in raid5_start_reshape()
8590 rdev->recovery_offset = 0; in raid5_start_reshape()
8595 } else if (rdev->raid_disk >= conf->previous_raid_disks in raid5_start_reshape()
8596 && !test_bit(Faulty, &rdev->flags)) { in raid5_start_reshape()
8598 set_bit(In_sync, &rdev->flags); in raid5_start_reshape()
8602 * ->degraded is measured against the larger of the in raid5_start_reshape()
8605 spin_lock_irqsave(&conf->device_lock, flags); in raid5_start_reshape()
8606 mddev->degraded = raid5_calc_degraded(conf); in raid5_start_reshape()
8607 spin_unlock_irqrestore(&conf->device_lock, flags); in raid5_start_reshape()
8609 mddev->raid_disks = conf->raid_disks; in raid5_start_reshape()
8610 mddev->reshape_position = conf->reshape_progress; in raid5_start_reshape()
8611 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in raid5_start_reshape()
8613 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); in raid5_start_reshape()
8614 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); in raid5_start_reshape()
8615 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); in raid5_start_reshape()
8616 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); in raid5_start_reshape()
8617 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); in raid5_start_reshape()
8618 rcu_assign_pointer(mddev->sync_thread, in raid5_start_reshape()
8620 if (!mddev->sync_thread) { in raid5_start_reshape()
8621 mddev->recovery = 0; in raid5_start_reshape()
8622 spin_lock_irq(&conf->device_lock); in raid5_start_reshape()
8623 write_seqcount_begin(&conf->gen_lock); in raid5_start_reshape()
8624 mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; in raid5_start_reshape()
8625 mddev->new_chunk_sectors = in raid5_start_reshape()
8626 conf->chunk_sectors = conf->prev_chunk_sectors; in raid5_start_reshape()
8627 mddev->new_layout = conf->algorithm = conf->prev_algo; in raid5_start_reshape()
8629 rdev->new_data_offset = rdev->data_offset; in raid5_start_reshape()
8631 conf->generation --; in raid5_start_reshape()
8632 conf->reshape_progress = MaxSector; in raid5_start_reshape()
8633 mddev->reshape_position = MaxSector; in raid5_start_reshape()
8634 write_seqcount_end(&conf->gen_lock); in raid5_start_reshape()
8635 spin_unlock_irq(&conf->device_lock); in raid5_start_reshape()
8636 return -EAGAIN; in raid5_start_reshape()
8638 conf->reshape_checkpoint = jiffies; in raid5_start_reshape()
8639 md_wakeup_thread(mddev->sync_thread); in raid5_start_reshape()
8645 * changes needed in 'conf'
8647 static void end_reshape(struct r5conf *conf) in end_reshape() argument
8650 if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { in end_reshape()
8653 spin_lock_irq(&conf->device_lock); in end_reshape()
8654 conf->previous_raid_disks = conf->raid_disks; in end_reshape()
8655 md_finish_reshape(conf->mddev); in end_reshape()
8657 conf->reshape_progress = MaxSector; in end_reshape()
8658 conf->mddev->reshape_position = MaxSector; in end_reshape()
8659 rdev_for_each(rdev, conf->mddev) in end_reshape()
8660 if (rdev->raid_disk >= 0 && in end_reshape()
8661 !test_bit(Journal, &rdev->flags) && in end_reshape()
8662 !test_bit(In_sync, &rdev->flags)) in end_reshape()
8663 rdev->recovery_offset = MaxSector; in end_reshape()
8664 spin_unlock_irq(&conf->device_lock); in end_reshape()
8665 wake_up(&conf->wait_for_overlap); in end_reshape()
8667 if (conf->mddev->queue) in end_reshape()
8668 raid5_set_io_opt(conf); in end_reshape()
8677 struct r5conf *conf = mddev->private; in raid5_finish_reshape() local
8680 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { in raid5_finish_reshape()
8682 if (mddev->delta_disks <= 0) { in raid5_finish_reshape()
8684 spin_lock_irq(&conf->device_lock); in raid5_finish_reshape()
8685 mddev->degraded = raid5_calc_degraded(conf); in raid5_finish_reshape()
8686 spin_unlock_irq(&conf->device_lock); in raid5_finish_reshape()
8687 for (d = conf->raid_disks ; in raid5_finish_reshape()
8688 d < conf->raid_disks - mddev->delta_disks; in raid5_finish_reshape()
8691 conf->disks[d].rdev); in raid5_finish_reshape()
8693 clear_bit(In_sync, &rdev->flags); in raid5_finish_reshape()
8695 conf->disks[d].replacement); in raid5_finish_reshape()
8697 clear_bit(In_sync, &rdev->flags); in raid5_finish_reshape()
8700 mddev->layout = conf->algorithm; in raid5_finish_reshape()
8701 mddev->chunk_sectors = conf->chunk_sectors; in raid5_finish_reshape()
8702 mddev->reshape_position = MaxSector; in raid5_finish_reshape()
8703 mddev->delta_disks = 0; in raid5_finish_reshape()
8704 mddev->reshape_backwards = 0; in raid5_finish_reshape()
8710 struct r5conf *conf = mddev->private; in raid5_quiesce() local
8714 lock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8718 r5c_flush_cache(conf, INT_MAX); in raid5_quiesce()
8722 smp_store_release(&conf->quiesce, 2); in raid5_quiesce()
8723 wait_event_cmd(conf->wait_for_quiescent, in raid5_quiesce()
8724 atomic_read(&conf->active_stripes) == 0 && in raid5_quiesce()
8725 atomic_read(&conf->active_aligned_reads) == 0, in raid5_quiesce()
8726 unlock_all_device_hash_locks_irq(conf), in raid5_quiesce()
8727 lock_all_device_hash_locks_irq(conf)); in raid5_quiesce()
8728 conf->quiesce = 1; in raid5_quiesce()
8729 unlock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8731 wake_up(&conf->wait_for_overlap); in raid5_quiesce()
8733 /* re-enable writes */ in raid5_quiesce()
8734 lock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8735 conf->quiesce = 0; in raid5_quiesce()
8736 wake_up(&conf->wait_for_quiescent); in raid5_quiesce()
8737 wake_up(&conf->wait_for_overlap); in raid5_quiesce()
8738 unlock_all_device_hash_locks_irq(conf); in raid5_quiesce()
8740 log_quiesce(conf, quiesce); in raid5_quiesce()
8745 struct r0conf *raid0_conf = mddev->private; in raid45_takeover_raid0()
8749 if (raid0_conf->nr_strip_zones > 1) { in raid45_takeover_raid0()
8752 return ERR_PTR(-EINVAL); in raid45_takeover_raid0()
8755 sectors = raid0_conf->strip_zone[0].zone_end; in raid45_takeover_raid0()
8756 sector_div(sectors, raid0_conf->strip_zone[0].nb_dev); in raid45_takeover_raid0()
8757 mddev->dev_sectors = sectors; in raid45_takeover_raid0()
8758 mddev->new_level = level; in raid45_takeover_raid0()
8759 mddev->new_layout = ALGORITHM_PARITY_N; in raid45_takeover_raid0()
8760 mddev->new_chunk_sectors = mddev->chunk_sectors; in raid45_takeover_raid0()
8761 mddev->raid_disks += 1; in raid45_takeover_raid0()
8762 mddev->delta_disks = 1; in raid45_takeover_raid0()
8764 mddev->recovery_cp = MaxSector; in raid45_takeover_raid0()
8774 if (mddev->raid_disks != 2 || in raid5_takeover_raid1()
8775 mddev->degraded > 1) in raid5_takeover_raid1()
8776 return ERR_PTR(-EINVAL); in raid5_takeover_raid1()
8778 /* Should check if there are write-behind devices? */ in raid5_takeover_raid1()
8783 while (chunksect && (mddev->array_sectors & (chunksect-1))) in raid5_takeover_raid1()
8786 if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private)) in raid5_takeover_raid1()
8788 return ERR_PTR(-EINVAL); in raid5_takeover_raid1()
8790 mddev->new_level = 5; in raid5_takeover_raid1()
8791 mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; in raid5_takeover_raid1()
8792 mddev->new_chunk_sectors = chunksect; in raid5_takeover_raid1()
8805 switch (mddev->layout) { in raid5_takeover_raid6()
8825 return ERR_PTR(-EINVAL); in raid5_takeover_raid6()
8827 mddev->new_level = 5; in raid5_takeover_raid6()
8828 mddev->new_layout = new_layout; in raid5_takeover_raid6()
8829 mddev->delta_disks = -1; in raid5_takeover_raid6()
8830 mddev->raid_disks -= 1; in raid5_takeover_raid6()
8836 /* For a 2-drive array, the layout and chunk size can be changed in raid5_check_reshape()
8838 * For larger arrays we record the new value - after validation in raid5_check_reshape()
8841 struct r5conf *conf = mddev->private; in raid5_check_reshape() local
8842 int new_chunk = mddev->new_chunk_sectors; in raid5_check_reshape()
8844 if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout)) in raid5_check_reshape()
8845 return -EINVAL; in raid5_check_reshape()
8848 return -EINVAL; in raid5_check_reshape()
8850 return -EINVAL; in raid5_check_reshape()
8851 if (mddev->array_sectors & (new_chunk-1)) in raid5_check_reshape()
8853 return -EINVAL; in raid5_check_reshape()
8858 if (mddev->raid_disks == 2) { in raid5_check_reshape()
8860 if (mddev->new_layout >= 0) { in raid5_check_reshape()
8861 conf->algorithm = mddev->new_layout; in raid5_check_reshape()
8862 mddev->layout = mddev->new_layout; in raid5_check_reshape()
8865 conf->chunk_sectors = new_chunk ; in raid5_check_reshape()
8866 mddev->chunk_sectors = new_chunk; in raid5_check_reshape()
8868 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); in raid5_check_reshape()
8869 md_wakeup_thread(mddev->thread); in raid5_check_reshape()
8876 int new_chunk = mddev->new_chunk_sectors; in raid6_check_reshape()
8878 if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout)) in raid6_check_reshape()
8879 return -EINVAL; in raid6_check_reshape()
8882 return -EINVAL; in raid6_check_reshape()
8884 return -EINVAL; in raid6_check_reshape()
8885 if (mddev->array_sectors & (new_chunk-1)) in raid6_check_reshape()
8887 return -EINVAL; in raid6_check_reshape()
8897 * raid0 - if there is only one strip zone - make it a raid4 layout in raid5_takeover()
8898 * raid1 - if there are two drives. We need to know the chunk size in raid5_takeover()
8899 * raid4 - trivial - just use a raid4 layout. in raid5_takeover()
8900 * raid6 - Providing it is a *_6 layout in raid5_takeover()
8902 if (mddev->level == 0) in raid5_takeover()
8904 if (mddev->level == 1) in raid5_takeover()
8906 if (mddev->level == 4) { in raid5_takeover()
8907 mddev->new_layout = ALGORITHM_PARITY_N; in raid5_takeover()
8908 mddev->new_level = 5; in raid5_takeover()
8911 if (mddev->level == 6) in raid5_takeover()
8914 return ERR_PTR(-EINVAL); in raid5_takeover()
8920 * raid0 - if there is only one strip zone in raid4_takeover()
8921 * raid5 - if layout is right in raid4_takeover()
8923 if (mddev->level == 0) in raid4_takeover()
8925 if (mddev->level == 5 && in raid4_takeover()
8926 mddev->layout == ALGORITHM_PARITY_N) { in raid4_takeover()
8927 mddev->new_layout = 0; in raid4_takeover()
8928 mddev->new_level = 4; in raid4_takeover()
8931 return ERR_PTR(-EINVAL); in raid4_takeover()
8944 if (mddev->pers != &raid5_personality) in raid6_takeover()
8945 return ERR_PTR(-EINVAL); in raid6_takeover()
8946 if (mddev->degraded > 1) in raid6_takeover()
8947 return ERR_PTR(-EINVAL); in raid6_takeover()
8948 if (mddev->raid_disks > 253) in raid6_takeover()
8949 return ERR_PTR(-EINVAL); in raid6_takeover()
8950 if (mddev->raid_disks < 3) in raid6_takeover()
8951 return ERR_PTR(-EINVAL); in raid6_takeover()
8953 switch (mddev->layout) { in raid6_takeover()
8973 return ERR_PTR(-EINVAL); in raid6_takeover()
8975 mddev->new_level = 6; in raid6_takeover()
8976 mddev->new_layout = new_layout; in raid6_takeover()
8977 mddev->delta_disks = 1; in raid6_takeover()
8978 mddev->raid_disks += 1; in raid6_takeover()
8984 struct r5conf *conf; in raid5_change_consistency_policy() local
8990 conf = mddev->private; in raid5_change_consistency_policy()
8991 if (!conf) { in raid5_change_consistency_policy()
8993 return -ENODEV; in raid5_change_consistency_policy()
8998 if (!raid5_has_ppl(conf) && conf->level == 5) { in raid5_change_consistency_policy()
8999 err = log_init(conf, NULL, true); in raid5_change_consistency_policy()
9001 err = resize_stripes(conf, conf->pool_size); in raid5_change_consistency_policy()
9004 log_exit(conf); in raid5_change_consistency_policy()
9009 err = -EINVAL; in raid5_change_consistency_policy()
9011 if (raid5_has_ppl(conf)) { in raid5_change_consistency_policy()
9013 log_exit(conf); in raid5_change_consistency_policy()
9015 err = resize_stripes(conf, conf->pool_size); in raid5_change_consistency_policy()
9016 } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && in raid5_change_consistency_policy()
9017 r5l_log_disk_error(conf)) { in raid5_change_consistency_policy()
9022 if (test_bit(Journal, &rdev->flags)) { in raid5_change_consistency_policy()
9029 clear_bit(MD_HAS_JOURNAL, &mddev->flags); in raid5_change_consistency_policy()
9032 err = -EBUSY; in raid5_change_consistency_policy()
9034 err = -EINVAL; in raid5_change_consistency_policy()
9036 err = -EINVAL; in raid5_change_consistency_policy()
9049 struct r5conf *conf = mddev->private; in raid5_start() local
9051 return r5l_start(conf->log); in raid5_start()
9056 struct r5conf *conf = mddev->private; in raid5_prepare_suspend() local
9058 wait_event(mddev->sb_wait, !reshape_inprogress(mddev) || in raid5_prepare_suspend()
9059 percpu_ref_is_zero(&mddev->active_io)); in raid5_prepare_suspend()
9060 if (percpu_ref_is_zero(&mddev->active_io)) in raid5_prepare_suspend()
9067 wake_up(&conf->wait_for_overlap); in raid5_prepare_suspend()
9154 return -ENOMEM; in raid5_init()
9183 MODULE_ALIAS("md-personality-4"); /* RAID5 */
9184 MODULE_ALIAS("md-raid5");
9185 MODULE_ALIAS("md-raid4");
9186 MODULE_ALIAS("md-level-5");
9187 MODULE_ALIAS("md-level-4");
9188 MODULE_ALIAS("md-personality-8"); /* RAID6 */
9189 MODULE_ALIAS("md-raid6");
9190 MODULE_ALIAS("md-level-6");