• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7 
8 #include <linux/dm-dirty-log.h>
9 #include <linux/dm-region-hash.h>
10 
11 #include <linux/ctype.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 
17 #include "dm.h"
18 
19 #define	DM_MSG_PREFIX	"region hash"
20 
21 /*-----------------------------------------------------------------
22  * Region hash
23  *
24  * The mirror splits itself up into discrete regions.  Each
25  * region can be in one of three states: clean, dirty,
26  * nosync.  There is no need to put clean regions in the hash.
27  *
28  * In addition to being present in the hash table a region _may_
29  * be present on one of three lists.
30  *
31  *   clean_regions: Regions on this list have no io pending to
32  *   them, they are in sync, we are no longer interested in them,
33  *   they are dull.  dm_rh_update_states() will remove them from the
34  *   hash table.
35  *
36  *   quiesced_regions: These regions have been spun down, ready
37  *   for recovery.  rh_recovery_start() will remove regions from
38  *   this list and hand them to kmirrord, which will schedule the
39  *   recovery io with kcopyd.
40  *
41  *   recovered_regions: Regions that kcopyd has successfully
42  *   recovered.  dm_rh_update_states() will now schedule any delayed
43  *   io, up the recovery_count, and remove the region from the
44  *   hash.
45  *
46  * There are 2 locks:
47  *   A rw spin lock 'hash_lock' protects just the hash table,
48  *   this is never held in write mode from interrupt context,
49  *   which I believe means that we only have to disable irqs when
50  *   doing a write lock.
51  *
52  *   An ordinary spin lock 'region_lock' that protects the three
53  *   lists in the region_hash, with the 'state', 'list' and
54  *   'delayed_bios' fields of the regions.  This is used from irq
55  *   context, so all other uses will have to suspend local irqs.
56  *---------------------------------------------------------------*/
57 struct dm_region_hash {
58 	uint32_t region_size;
59 	unsigned int region_shift;
60 
61 	/* holds persistent region state */
62 	struct dm_dirty_log *log;
63 
64 	/* hash table */
65 	rwlock_t hash_lock;
66 	unsigned int mask;
67 	unsigned int nr_buckets;
68 	unsigned int prime;
69 	unsigned int shift;
70 	struct list_head *buckets;
71 
72 	/*
73 	 * If there was a flush failure no regions can be marked clean.
74 	 */
75 	int flush_failure;
76 
77 	unsigned int max_recovery; /* Max # of regions to recover in parallel */
78 
79 	spinlock_t region_lock;
80 	atomic_t recovery_in_flight;
81 	struct list_head clean_regions;
82 	struct list_head quiesced_regions;
83 	struct list_head recovered_regions;
84 	struct list_head failed_recovered_regions;
85 	struct semaphore recovery_count;
86 
87 	mempool_t region_pool;
88 
89 	void *context;
90 	sector_t target_begin;
91 
92 	/* Callback function to schedule bios writes */
93 	void (*dispatch_bios)(void *context, struct bio_list *bios);
94 
95 	/* Callback function to wakeup callers worker thread. */
96 	void (*wakeup_workers)(void *context);
97 
98 	/* Callback function to wakeup callers recovery waiters. */
99 	void (*wakeup_all_recovery_waiters)(void *context);
100 };
101 
102 struct dm_region {
103 	struct dm_region_hash *rh;	/* FIXME: can we get rid of this ? */
104 	region_t key;
105 	int state;
106 
107 	struct list_head hash_list;
108 	struct list_head list;
109 
110 	atomic_t pending;
111 	struct bio_list delayed_bios;
112 };
113 
114 /*
115  * Conversion fns
116  */
dm_rh_sector_to_region(struct dm_region_hash * rh,sector_t sector)117 static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
118 {
119 	return sector >> rh->region_shift;
120 }
121 
dm_rh_region_to_sector(struct dm_region_hash * rh,region_t region)122 sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
123 {
124 	return region << rh->region_shift;
125 }
126 EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
127 
dm_rh_bio_to_region(struct dm_region_hash * rh,struct bio * bio)128 region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
129 {
130 	return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
131 				      rh->target_begin);
132 }
133 EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
134 
dm_rh_region_context(struct dm_region * reg)135 void *dm_rh_region_context(struct dm_region *reg)
136 {
137 	return reg->rh->context;
138 }
139 EXPORT_SYMBOL_GPL(dm_rh_region_context);
140 
dm_rh_get_region_key(struct dm_region * reg)141 region_t dm_rh_get_region_key(struct dm_region *reg)
142 {
143 	return reg->key;
144 }
145 EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
146 
dm_rh_get_region_size(struct dm_region_hash * rh)147 sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
148 {
149 	return rh->region_size;
150 }
151 EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
152 
153 /*
154  * FIXME: shall we pass in a structure instead of all these args to
155  * dm_region_hash_create()????
156  */
157 #define RH_HASH_MULT 2654435387U
158 #define RH_HASH_SHIFT 12
159 
160 #define MIN_REGIONS 64
dm_region_hash_create(void * context,void (* dispatch_bios)(void * context,struct bio_list * bios),void (* wakeup_workers)(void * context),void (* wakeup_all_recovery_waiters)(void * context),sector_t target_begin,unsigned int max_recovery,struct dm_dirty_log * log,uint32_t region_size,region_t nr_regions)161 struct dm_region_hash *dm_region_hash_create(
162 		void *context, void (*dispatch_bios)(void *context,
163 						     struct bio_list *bios),
164 		void (*wakeup_workers)(void *context),
165 		void (*wakeup_all_recovery_waiters)(void *context),
166 		sector_t target_begin, unsigned int max_recovery,
167 		struct dm_dirty_log *log, uint32_t region_size,
168 		region_t nr_regions)
169 {
170 	struct dm_region_hash *rh;
171 	unsigned int nr_buckets, max_buckets;
172 	size_t i;
173 	int ret;
174 
175 	/*
176 	 * Calculate a suitable number of buckets for our hash
177 	 * table.
178 	 */
179 	max_buckets = nr_regions >> 6;
180 	for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
181 		;
182 	nr_buckets >>= 1;
183 
184 	rh = kzalloc(sizeof(*rh), GFP_KERNEL);
185 	if (!rh) {
186 		DMERR("unable to allocate region hash memory");
187 		return ERR_PTR(-ENOMEM);
188 	}
189 
190 	rh->context = context;
191 	rh->dispatch_bios = dispatch_bios;
192 	rh->wakeup_workers = wakeup_workers;
193 	rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
194 	rh->target_begin = target_begin;
195 	rh->max_recovery = max_recovery;
196 	rh->log = log;
197 	rh->region_size = region_size;
198 	rh->region_shift = __ffs(region_size);
199 	rwlock_init(&rh->hash_lock);
200 	rh->mask = nr_buckets - 1;
201 	rh->nr_buckets = nr_buckets;
202 
203 	rh->shift = RH_HASH_SHIFT;
204 	rh->prime = RH_HASH_MULT;
205 
206 	rh->buckets = vmalloc(array_size(nr_buckets, sizeof(*rh->buckets)));
207 	if (!rh->buckets) {
208 		DMERR("unable to allocate region hash bucket memory");
209 		kfree(rh);
210 		return ERR_PTR(-ENOMEM);
211 	}
212 
213 	for (i = 0; i < nr_buckets; i++)
214 		INIT_LIST_HEAD(rh->buckets + i);
215 
216 	spin_lock_init(&rh->region_lock);
217 	sema_init(&rh->recovery_count, 0);
218 	atomic_set(&rh->recovery_in_flight, 0);
219 	INIT_LIST_HEAD(&rh->clean_regions);
220 	INIT_LIST_HEAD(&rh->quiesced_regions);
221 	INIT_LIST_HEAD(&rh->recovered_regions);
222 	INIT_LIST_HEAD(&rh->failed_recovered_regions);
223 	rh->flush_failure = 0;
224 
225 	ret = mempool_init_kmalloc_pool(&rh->region_pool, MIN_REGIONS,
226 					sizeof(struct dm_region));
227 	if (ret) {
228 		vfree(rh->buckets);
229 		kfree(rh);
230 		rh = ERR_PTR(-ENOMEM);
231 	}
232 
233 	return rh;
234 }
235 EXPORT_SYMBOL_GPL(dm_region_hash_create);
236 
dm_region_hash_destroy(struct dm_region_hash * rh)237 void dm_region_hash_destroy(struct dm_region_hash *rh)
238 {
239 	unsigned int h;
240 	struct dm_region *reg, *nreg;
241 
242 	BUG_ON(!list_empty(&rh->quiesced_regions));
243 	for (h = 0; h < rh->nr_buckets; h++) {
244 		list_for_each_entry_safe(reg, nreg, rh->buckets + h,
245 					 hash_list) {
246 			BUG_ON(atomic_read(&reg->pending));
247 			mempool_free(reg, &rh->region_pool);
248 		}
249 	}
250 
251 	if (rh->log)
252 		dm_dirty_log_destroy(rh->log);
253 
254 	mempool_exit(&rh->region_pool);
255 	vfree(rh->buckets);
256 	kfree(rh);
257 }
258 EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
259 
dm_rh_dirty_log(struct dm_region_hash * rh)260 struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
261 {
262 	return rh->log;
263 }
264 EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
265 
rh_hash(struct dm_region_hash * rh,region_t region)266 static unsigned int rh_hash(struct dm_region_hash *rh, region_t region)
267 {
268 	return (unsigned int) ((region * rh->prime) >> rh->shift) & rh->mask;
269 }
270 
__rh_lookup(struct dm_region_hash * rh,region_t region)271 static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
272 {
273 	struct dm_region *reg;
274 	struct list_head *bucket = rh->buckets + rh_hash(rh, region);
275 
276 	list_for_each_entry(reg, bucket, hash_list)
277 		if (reg->key == region)
278 			return reg;
279 
280 	return NULL;
281 }
282 
__rh_insert(struct dm_region_hash * rh,struct dm_region * reg)283 static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
284 {
285 	list_add(&reg->hash_list, rh->buckets + rh_hash(rh, reg->key));
286 }
287 
__rh_alloc(struct dm_region_hash * rh,region_t region)288 static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
289 {
290 	struct dm_region *reg, *nreg;
291 
292 	nreg = mempool_alloc(&rh->region_pool, GFP_ATOMIC);
293 	if (unlikely(!nreg))
294 		nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
295 
296 	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
297 		      DM_RH_CLEAN : DM_RH_NOSYNC;
298 	nreg->rh = rh;
299 	nreg->key = region;
300 	INIT_LIST_HEAD(&nreg->list);
301 	atomic_set(&nreg->pending, 0);
302 	bio_list_init(&nreg->delayed_bios);
303 
304 	write_lock_irq(&rh->hash_lock);
305 	reg = __rh_lookup(rh, region);
306 	if (reg)
307 		/* We lost the race. */
308 		mempool_free(nreg, &rh->region_pool);
309 	else {
310 		__rh_insert(rh, nreg);
311 		if (nreg->state == DM_RH_CLEAN) {
312 			spin_lock(&rh->region_lock);
313 			list_add(&nreg->list, &rh->clean_regions);
314 			spin_unlock(&rh->region_lock);
315 		}
316 
317 		reg = nreg;
318 	}
319 	write_unlock_irq(&rh->hash_lock);
320 
321 	return reg;
322 }
323 
__rh_find(struct dm_region_hash * rh,region_t region)324 static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
325 {
326 	struct dm_region *reg;
327 
328 	reg = __rh_lookup(rh, region);
329 	if (!reg) {
330 		read_unlock(&rh->hash_lock);
331 		reg = __rh_alloc(rh, region);
332 		read_lock(&rh->hash_lock);
333 	}
334 
335 	return reg;
336 }
337 
dm_rh_get_state(struct dm_region_hash * rh,region_t region,int may_block)338 int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
339 {
340 	int r;
341 	struct dm_region *reg;
342 
343 	read_lock(&rh->hash_lock);
344 	reg = __rh_lookup(rh, region);
345 	read_unlock(&rh->hash_lock);
346 
347 	if (reg)
348 		return reg->state;
349 
350 	/*
351 	 * The region wasn't in the hash, so we fall back to the
352 	 * dirty log.
353 	 */
354 	r = rh->log->type->in_sync(rh->log, region, may_block);
355 
356 	/*
357 	 * Any error from the dirty log (eg. -EWOULDBLOCK) gets
358 	 * taken as a DM_RH_NOSYNC
359 	 */
360 	return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
361 }
362 EXPORT_SYMBOL_GPL(dm_rh_get_state);
363 
complete_resync_work(struct dm_region * reg,int success)364 static void complete_resync_work(struct dm_region *reg, int success)
365 {
366 	struct dm_region_hash *rh = reg->rh;
367 
368 	rh->log->type->set_region_sync(rh->log, reg->key, success);
369 
370 	/*
371 	 * Dispatch the bios before we call 'wake_up_all'.
372 	 * This is important because if we are suspending,
373 	 * we want to know that recovery is complete and
374 	 * the work queue is flushed.  If we wake_up_all
375 	 * before we dispatch_bios (queue bios and call wake()),
376 	 * then we risk suspending before the work queue
377 	 * has been properly flushed.
378 	 */
379 	rh->dispatch_bios(rh->context, &reg->delayed_bios);
380 	if (atomic_dec_and_test(&rh->recovery_in_flight))
381 		rh->wakeup_all_recovery_waiters(rh->context);
382 	up(&rh->recovery_count);
383 }
384 
385 /* dm_rh_mark_nosync
386  * @ms
387  * @bio
388  *
389  * The bio was written on some mirror(s) but failed on other mirror(s).
390  * We can successfully endio the bio but should avoid the region being
391  * marked clean by setting the state DM_RH_NOSYNC.
392  *
393  * This function is _not_ safe in interrupt context!
394  */
dm_rh_mark_nosync(struct dm_region_hash * rh,struct bio * bio)395 void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
396 {
397 	unsigned long flags;
398 	struct dm_dirty_log *log = rh->log;
399 	struct dm_region *reg;
400 	region_t region = dm_rh_bio_to_region(rh, bio);
401 	int recovering = 0;
402 
403 	if (bio->bi_opf & REQ_PREFLUSH) {
404 		rh->flush_failure = 1;
405 		return;
406 	}
407 
408 	if (bio_op(bio) == REQ_OP_DISCARD)
409 		return;
410 
411 	/* We must inform the log that the sync count has changed. */
412 	log->type->set_region_sync(log, region, 0);
413 
414 	read_lock(&rh->hash_lock);
415 	reg = __rh_find(rh, region);
416 	read_unlock(&rh->hash_lock);
417 
418 	/* region hash entry should exist because write was in-flight */
419 	BUG_ON(!reg);
420 	BUG_ON(!list_empty(&reg->list));
421 
422 	spin_lock_irqsave(&rh->region_lock, flags);
423 	/*
424 	 * Possible cases:
425 	 *   1) DM_RH_DIRTY
426 	 *   2) DM_RH_NOSYNC: was dirty, other preceding writes failed
427 	 *   3) DM_RH_RECOVERING: flushing pending writes
428 	 * Either case, the region should have not been connected to list.
429 	 */
430 	recovering = (reg->state == DM_RH_RECOVERING);
431 	reg->state = DM_RH_NOSYNC;
432 	BUG_ON(!list_empty(&reg->list));
433 	spin_unlock_irqrestore(&rh->region_lock, flags);
434 
435 	if (recovering)
436 		complete_resync_work(reg, 0);
437 }
438 EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
439 
dm_rh_update_states(struct dm_region_hash * rh,int errors_handled)440 void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
441 {
442 	struct dm_region *reg, *next;
443 
444 	LIST_HEAD(clean);
445 	LIST_HEAD(recovered);
446 	LIST_HEAD(failed_recovered);
447 
448 	/*
449 	 * Quickly grab the lists.
450 	 */
451 	write_lock_irq(&rh->hash_lock);
452 	spin_lock(&rh->region_lock);
453 	if (!list_empty(&rh->clean_regions)) {
454 		list_splice_init(&rh->clean_regions, &clean);
455 
456 		list_for_each_entry(reg, &clean, list)
457 			list_del(&reg->hash_list);
458 	}
459 
460 	if (!list_empty(&rh->recovered_regions)) {
461 		list_splice_init(&rh->recovered_regions, &recovered);
462 
463 		list_for_each_entry(reg, &recovered, list)
464 			list_del(&reg->hash_list);
465 	}
466 
467 	if (!list_empty(&rh->failed_recovered_regions)) {
468 		list_splice_init(&rh->failed_recovered_regions,
469 				 &failed_recovered);
470 
471 		list_for_each_entry(reg, &failed_recovered, list)
472 			list_del(&reg->hash_list);
473 	}
474 
475 	spin_unlock(&rh->region_lock);
476 	write_unlock_irq(&rh->hash_lock);
477 
478 	/*
479 	 * All the regions on the recovered and clean lists have
480 	 * now been pulled out of the system, so no need to do
481 	 * any more locking.
482 	 */
483 	list_for_each_entry_safe(reg, next, &recovered, list) {
484 		rh->log->type->clear_region(rh->log, reg->key);
485 		complete_resync_work(reg, 1);
486 		mempool_free(reg, &rh->region_pool);
487 	}
488 
489 	list_for_each_entry_safe(reg, next, &failed_recovered, list) {
490 		complete_resync_work(reg, errors_handled ? 0 : 1);
491 		mempool_free(reg, &rh->region_pool);
492 	}
493 
494 	list_for_each_entry_safe(reg, next, &clean, list) {
495 		rh->log->type->clear_region(rh->log, reg->key);
496 		mempool_free(reg, &rh->region_pool);
497 	}
498 
499 	rh->log->type->flush(rh->log);
500 }
501 EXPORT_SYMBOL_GPL(dm_rh_update_states);
502 
rh_inc(struct dm_region_hash * rh,region_t region)503 static void rh_inc(struct dm_region_hash *rh, region_t region)
504 {
505 	struct dm_region *reg;
506 
507 	read_lock(&rh->hash_lock);
508 	reg = __rh_find(rh, region);
509 
510 	spin_lock_irq(&rh->region_lock);
511 	atomic_inc(&reg->pending);
512 
513 	if (reg->state == DM_RH_CLEAN) {
514 		reg->state = DM_RH_DIRTY;
515 		list_del_init(&reg->list);	/* take off the clean list */
516 		spin_unlock_irq(&rh->region_lock);
517 
518 		rh->log->type->mark_region(rh->log, reg->key);
519 	} else
520 		spin_unlock_irq(&rh->region_lock);
521 
522 
523 	read_unlock(&rh->hash_lock);
524 }
525 
dm_rh_inc_pending(struct dm_region_hash * rh,struct bio_list * bios)526 void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
527 {
528 	struct bio *bio;
529 
530 	for (bio = bios->head; bio; bio = bio->bi_next) {
531 		if (bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)
532 			continue;
533 		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
534 	}
535 }
536 EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
537 
dm_rh_dec(struct dm_region_hash * rh,region_t region)538 void dm_rh_dec(struct dm_region_hash *rh, region_t region)
539 {
540 	unsigned long flags;
541 	struct dm_region *reg;
542 	int should_wake = 0;
543 
544 	read_lock(&rh->hash_lock);
545 	reg = __rh_lookup(rh, region);
546 	read_unlock(&rh->hash_lock);
547 
548 	spin_lock_irqsave(&rh->region_lock, flags);
549 	if (atomic_dec_and_test(&reg->pending)) {
550 		/*
551 		 * There is no pending I/O for this region.
552 		 * We can move the region to corresponding list for next action.
553 		 * At this point, the region is not yet connected to any list.
554 		 *
555 		 * If the state is DM_RH_NOSYNC, the region should be kept off
556 		 * from clean list.
557 		 * The hash entry for DM_RH_NOSYNC will remain in memory
558 		 * until the region is recovered or the map is reloaded.
559 		 */
560 
561 		/* do nothing for DM_RH_NOSYNC */
562 		if (unlikely(rh->flush_failure)) {
563 			/*
564 			 * If a write flush failed some time ago, we
565 			 * don't know whether or not this write made it
566 			 * to the disk, so we must resync the device.
567 			 */
568 			reg->state = DM_RH_NOSYNC;
569 		} else if (reg->state == DM_RH_RECOVERING) {
570 			list_add_tail(&reg->list, &rh->quiesced_regions);
571 		} else if (reg->state == DM_RH_DIRTY) {
572 			reg->state = DM_RH_CLEAN;
573 			list_add(&reg->list, &rh->clean_regions);
574 		}
575 		should_wake = 1;
576 	}
577 	spin_unlock_irqrestore(&rh->region_lock, flags);
578 
579 	if (should_wake)
580 		rh->wakeup_workers(rh->context);
581 }
582 EXPORT_SYMBOL_GPL(dm_rh_dec);
583 
584 /*
585  * Starts quiescing a region in preparation for recovery.
586  */
__rh_recovery_prepare(struct dm_region_hash * rh)587 static int __rh_recovery_prepare(struct dm_region_hash *rh)
588 {
589 	int r;
590 	region_t region;
591 	struct dm_region *reg;
592 
593 	/*
594 	 * Ask the dirty log what's next.
595 	 */
596 	r = rh->log->type->get_resync_work(rh->log, &region);
597 	if (r <= 0)
598 		return r;
599 
600 	/*
601 	 * Get this region, and start it quiescing by setting the
602 	 * recovering flag.
603 	 */
604 	read_lock(&rh->hash_lock);
605 	reg = __rh_find(rh, region);
606 	read_unlock(&rh->hash_lock);
607 
608 	spin_lock_irq(&rh->region_lock);
609 	reg->state = DM_RH_RECOVERING;
610 
611 	/* Already quiesced ? */
612 	if (atomic_read(&reg->pending))
613 		list_del_init(&reg->list);
614 	else
615 		list_move(&reg->list, &rh->quiesced_regions);
616 
617 	spin_unlock_irq(&rh->region_lock);
618 
619 	return 1;
620 }
621 
dm_rh_recovery_prepare(struct dm_region_hash * rh)622 void dm_rh_recovery_prepare(struct dm_region_hash *rh)
623 {
624 	/* Extra reference to avoid race with dm_rh_stop_recovery */
625 	atomic_inc(&rh->recovery_in_flight);
626 
627 	while (!down_trylock(&rh->recovery_count)) {
628 		atomic_inc(&rh->recovery_in_flight);
629 		if (__rh_recovery_prepare(rh) <= 0) {
630 			atomic_dec(&rh->recovery_in_flight);
631 			up(&rh->recovery_count);
632 			break;
633 		}
634 	}
635 
636 	/* Drop the extra reference */
637 	if (atomic_dec_and_test(&rh->recovery_in_flight))
638 		rh->wakeup_all_recovery_waiters(rh->context);
639 }
640 EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
641 
642 /*
643  * Returns any quiesced regions.
644  */
dm_rh_recovery_start(struct dm_region_hash * rh)645 struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
646 {
647 	struct dm_region *reg = NULL;
648 
649 	spin_lock_irq(&rh->region_lock);
650 	if (!list_empty(&rh->quiesced_regions)) {
651 		reg = list_entry(rh->quiesced_regions.next,
652 				 struct dm_region, list);
653 		list_del_init(&reg->list);  /* remove from the quiesced list */
654 	}
655 	spin_unlock_irq(&rh->region_lock);
656 
657 	return reg;
658 }
659 EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
660 
dm_rh_recovery_end(struct dm_region * reg,int success)661 void dm_rh_recovery_end(struct dm_region *reg, int success)
662 {
663 	struct dm_region_hash *rh = reg->rh;
664 
665 	spin_lock_irq(&rh->region_lock);
666 	if (success)
667 		list_add(&reg->list, &reg->rh->recovered_regions);
668 	else
669 		list_add(&reg->list, &reg->rh->failed_recovered_regions);
670 
671 	spin_unlock_irq(&rh->region_lock);
672 
673 	rh->wakeup_workers(rh->context);
674 }
675 EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
676 
677 /* Return recovery in flight count. */
dm_rh_recovery_in_flight(struct dm_region_hash * rh)678 int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
679 {
680 	return atomic_read(&rh->recovery_in_flight);
681 }
682 EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
683 
dm_rh_flush(struct dm_region_hash * rh)684 int dm_rh_flush(struct dm_region_hash *rh)
685 {
686 	return rh->log->type->flush(rh->log);
687 }
688 EXPORT_SYMBOL_GPL(dm_rh_flush);
689 
dm_rh_delay(struct dm_region_hash * rh,struct bio * bio)690 void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
691 {
692 	struct dm_region *reg;
693 
694 	read_lock(&rh->hash_lock);
695 	reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
696 	bio_list_add(&reg->delayed_bios, bio);
697 	read_unlock(&rh->hash_lock);
698 }
699 EXPORT_SYMBOL_GPL(dm_rh_delay);
700 
dm_rh_stop_recovery(struct dm_region_hash * rh)701 void dm_rh_stop_recovery(struct dm_region_hash *rh)
702 {
703 	int i;
704 
705 	/* wait for any recovering regions */
706 	for (i = 0; i < rh->max_recovery; i++)
707 		down(&rh->recovery_count);
708 }
709 EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
710 
dm_rh_start_recovery(struct dm_region_hash * rh)711 void dm_rh_start_recovery(struct dm_region_hash *rh)
712 {
713 	int i;
714 
715 	for (i = 0; i < rh->max_recovery; i++)
716 		up(&rh->recovery_count);
717 
718 	rh->wakeup_workers(rh->context);
719 }
720 EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
721 
722 MODULE_DESCRIPTION(DM_NAME " region hash");
723 MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
724 MODULE_LICENSE("GPL");
725