• Home
  • Raw
  • Download

Lines Matching full:recovery

7  * recovery stuff
136 /* Worker function used during recovery. */
173 * RECOVERY THREAD
178 /* wake the recovery thread in dlm_kick_recovery_thread()
180 * 1) sleeping with no recovery happening in dlm_kick_recovery_thread()
181 * 2) sleeping with recovery mastered elsewhere in dlm_kick_recovery_thread()
182 * 3) recovery mastered here, waiting on reco data */ in dlm_kick_recovery_thread()
187 /* Launch the recovery thread */
190 mlog(0, "starting dlm recovery thread...\n"); in dlm_launch_recovery_thread()
206 mlog(0, "waiting for dlm recovery thread to exit\n"); in dlm_complete_recovery_thread()
215 * this is lame, but here's how recovery works...
216 * 1) all recovery threads cluster wide will work on recovering
222 * 4) each of these locks should be locked until recovery is done
232 * everything and recovery for this dead node is done
242 mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n", in dlm_print_reco_node_status()
310 mlog(0, "quitting DLM recovery thread\n"); in dlm_recovery_thread()
314 /* returns true when the recovery master has contacted us */
369 printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in " in dlm_wait_for_node_recovery()
382 * block on the dlm->reco.event when recovery is in progress.
383 * the dlm recovery thread will set this state when it begins
386 * been marked with the RECOVERY flag */
400 mlog(0, "%s: reco thread %d in recovery: " in dlm_wait_for_recovery()
413 printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", in dlm_begin_recovery()
424 printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name); in dlm_end_recovery()
430 printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the " in dlm_print_recovery_master()
444 mlog(0, "%s: no need do recovery after migrating all " in dlm_do_recovery()
470 mlog(ML_ERROR, "dead_node %u no longer in recovery map!\n", in dlm_do_recovery()
481 mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", in dlm_do_recovery()
498 * or recovery completes entirely. */ in dlm_do_recovery()
504 mlog(0, "another node will master this recovery session.\n"); in dlm_do_recovery()
511 * have been marked as in-recovery */ in dlm_do_recovery()
529 /* success! see if any other nodes need recovery */ in dlm_do_recovery()
530 mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", in dlm_do_recovery()
552 /* we have become recovery master. there is no escaping in dlm_remaster_locks()
556 mlog(ML_ERROR, "%s: failed to alloc recovery area, " in dlm_remaster_locks()
582 /* node died, ignore it for recovery */ in dlm_remaster_locks()
598 "%d during recovery, retrying " in dlm_remaster_locks()
616 "recovery info for node %u\n", in dlm_remaster_locks()
623 mlog(0, "now receiving recovery data from " in dlm_remaster_locks()
628 mlog(0, "already receiving recovery data from " in dlm_remaster_locks()
633 mlog(0, "already DONE receiving recovery data " in dlm_remaster_locks()
652 mlog(0, "checking recovery state of node %u\n", in dlm_remaster_locks()
664 "requesting recovery info for " in dlm_remaster_locks()
693 /* Set this flag on recovery master to avoid in dlm_remaster_locks()
694 * a new recovery for another dead node start in dlm_remaster_locks()
695 * before the recovery is not done. That may in dlm_remaster_locks()
696 * cause recovery hung.*/ in dlm_remaster_locks()
713 mlog(0, "should be done with recovery!\n"); in dlm_remaster_locks()
715 mlog(0, "finishing recovery of %s at %lu, " in dlm_remaster_locks()
887 mlog(0, "%s: recovery worker started, dead=%u, master=%u\n", in dlm_request_all_locks_worker()
892 /* worker could have been created before the recovery master in dlm_request_all_locks_worker()
895 mlog(ML_NOTICE, "%s: will not send recovery state, " in dlm_request_all_locks_worker()
896 "recovery master %u died, thread=(dead=%u,mas=%u)" in dlm_request_all_locks_worker()
913 * can safely move UNKNOWN lock resources for each recovery in dlm_request_all_locks_worker()
926 "recovery state for dead node %u, ret=%d\n", dlm->name, in dlm_request_all_locks_worker()
942 "recovery all-done for dead node %u, ret=%d\n", in dlm_request_all_locks_worker()
1014 * the line of recovery */ in dlm_reco_data_done_handler()
1020 "recovery data!\n", in dlm_reco_data_done_handler()
1030 /* wake the recovery thread, some node is done */ in dlm_reco_data_done_handler()
1035 mlog(ML_ERROR, "failed to find recovery node data for node " in dlm_reco_data_done_handler()
1052 /* always prune any $RECOVERY entries for dead nodes, in dlm_move_reco_locks_to_list()
1053 * otherwise hangs can occur during later recovery */ in dlm_move_reco_locks_to_list()
1060 "a $RECOVERY lock for dead " in dlm_move_reco_locks_to_list()
1077 "doing recovery for node %u. sending it.\n", in dlm_move_reco_locks_to_list()
1081 mlog(0, "found UNKNOWN owner while doing recovery " in dlm_move_reco_locks_to_list()
1128 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery", in dlm_send_mig_lockres_msg()
1142 "migration" : "recovery")); in dlm_send_mig_lockres_msg()
1317 send_to, flags & DLM_MRES_RECOVERY ? "recovery" : in dlm_send_one_lockres()
1334 flags & DLM_MRES_RECOVERY ? "recovery" : "migration", in dlm_send_one_lockres()
1343 * recovery data, and it will work on only one lockres.
1350 * we really cannot afford to fail an alloc in recovery
1390 "recovery" : "migration", mres->master); in dlm_mig_lockres_handler()
1402 * and RECOVERY flag changed when it completes. */ in dlm_mig_lockres_handler()
1499 * or when a lock is added by the recovery worker */ in dlm_mig_lockres_handler()
1504 mlog(0, "recovery has passed me a lockres with an " in dlm_mig_lockres_handler()
1983 * wrt lock queue ordering and recovery: in dlm_process_recovery_data()
2065 "Recovering res %s:%.*s, is already on recovery list!\n", in dlm_move_lockres_to_recovery_list()
2070 /* We need to hold a reference while on the recovery list */ in dlm_move_lockres_to_recovery_list()
2108 * recovery master. */ in dlm_move_lockres_to_recovery_list()
2134 /* removes all recovered locks from the recovery list.
2136 * unsets the RECOVERY flag and wakes waiters. */
2154 * the lock state sent during recovery */ in dlm_finish_local_lockres_recovery()
2192 * the lock state sent during recovery */ in dlm_finish_local_lockres_recovery()
2351 * dead node. once recovery finishes, the dlm thread in dlm_do_local_recovery_cleanup()
2358 /* always prune any $RECOVERY entries for dead nodes, in dlm_do_local_recovery_cleanup()
2359 * otherwise hangs can occur during later recovery */ in dlm_do_local_recovery_cleanup()
2366 "a $RECOVERY lock for dead " in dlm_do_local_recovery_cleanup()
2434 mlog(0, "%s: recovery master %d just died\n", in __dlm_hb_node_down()
2438 * the new_master and dead_node. that recovery in __dlm_hb_node_down()
2456 "another node likely did recovery already.\n", in __dlm_hb_node_down()
2528 mlog(0, "ast for recovery lock fired!, this=%u, dlm=%s\n", in dlm_reco_ast()
2534 mlog(0, "bast for recovery lock fired!, this=%u, dlm=%s\n", in dlm_reco_bast()
2539 mlog(0, "unlockast for recovery lock fired!\n"); in dlm_reco_unlock_ast()
2544 * dlmlock() on the special "$RECOVERY" lockres with the
2546 * this function on each node racing to become the recovery
2548 * a) this node gets the EX (and becomes the recovery master),
2551 * so each time a recovery master is needed, the entire cluster
2560 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", in dlm_pick_recovery_master()
2569 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", in dlm_pick_recovery_master()
2580 "do the recovery\n", dlm->name, in dlm_pick_recovery_master()
2586 /* see if recovery was already finished elsewhere */ in dlm_pick_recovery_master()
2602 /* if this node has actually become the recovery master, in dlm_pick_recovery_master()
2603 * set the master and send the messages to begin recovery */ in dlm_pick_recovery_master()
2619 /* recovery lock is a special case. ast will not get fired, in dlm_pick_recovery_master()
2661 mlog(ML_ERROR, "%s: got %s from dlmlock($RECOVERY), " in dlm_pick_recovery_master()
2670 mlog(ML_ERROR, "recovery lock not found\n"); in dlm_pick_recovery_master()
2719 /* node is down. not involved in recovery in dlm_send_begin_reco_message()
2732 mlog(0, "%s: trying to start recovery of node " in dlm_send_begin_reco_message()
2733 "%u, but node %u is waiting for last recovery " in dlm_send_begin_reco_message()
2753 mlog(ML_ERROR, "recovery lock not found\n"); in dlm_send_begin_reco_message()
2814 mlog(0, "recovery master %u sees %u as dead, but this " in dlm_begin_reco_handler()
2822 /* force the recovery cleanup in __dlm_hb_node_down in dlm_begin_reco_handler()
2832 mlog(0, "%s: recovery started by node %u, for %u (%u:%u)\n", in dlm_begin_reco_handler()
2850 mlog(0, "finishing recovery for node %s:%u, " in dlm_send_finalize_reco_message()
2876 /* this has no effect on this recovery in dlm_send_finalize_reco_message()
2878 * finish out the last recovery */ in dlm_send_finalize_reco_message()
2880 "node finished recovery.\n", nodenum); in dlm_send_finalize_reco_message()
2911 mlog(0, "%s: node %u finalizing recovery stage%d of " in dlm_finalize_reco_handler()
2918 mlog(ML_ERROR, "node %u sent recovery finalize msg, but node " in dlm_finalize_reco_handler()
2924 mlog(ML_ERROR, "node %u sent recovery finalize msg for dead " in dlm_finalize_reco_handler()
2960 mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n", in dlm_finalize_reco_handler()