Fenix @develop
 
Loading...
Searching...
No Matches
fenix_process_recovery.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5//
6// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _|
7// _| _| _|_| _| _| _| _|
8// _|_|_| _|_|_| _| _| _| _| _|
9// _| _| _| _|_| _| _| _|
10// _| _|_|_|_| _| _| _|_|_| _| _|
11//
12//
13//
14//
15// Copyright (C) 2016 Rutgers University and Sandia Corporation
16//
17// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18// the U.S. Government retains certain rights in this software.
19//
20// Redistribution and use in source and binary forms, with or without
21// modification, are permitted provided that the following conditions are
22// met:
23//
24// 1. Redistributions of source code must retain the above copyright
25// notice, this list of conditions and the following disclaimer.
26//
27// 2. Redistributions in binary form must reproduce the above copyright
28// notice, this list of conditions and the following disclaimer in the
29// documentation and/or other materials provided with the distribution.
30//
31// 3. Neither the name of the Corporation nor the names of the
32// contributors may be used to endorse or promote products derived from
33// this software without specific prior written permission.
34//
35// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
36// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
39// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46//
47// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar,
48// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock
49//
50// Questions? Contact Keita Teranishi (knteran@sandia.gov) and
51// Marc Gamell (mgamell@cac.rutgers.edu)
52//
53// ************************************************************************
54//@HEADER
55*/
56
57#ifndef __FENIX_PROCESS_RECOVERY__
58#define __FENIX_PROCESS_RECOVERY__
59
60#include <mpi.h>
61#include <setjmp.h>
62#include <stdio.h>
63#include <unistd.h>
64#include <stdlib.h>
65#include <stdarg.h>
66#include <stdint.h>
67#include <signal.h>
68
69#include "fenix_init.h"
70#include <functional>
71
72#define __FENIX_RESUME_AT_INIT 0
73#define __FENIX_RESUME_NO_JUMP 200
74
75using fenix_callback_func = std::function<void(MPI_Comm, int)>;
76
77typedef struct __fenix_comm_list_elm {
78 struct __fenix_comm_list_elm *next;
79 struct __fenix_comm_list_elm *prev;
80 MPI_Comm *comm;
82
83typedef struct {
87
88int __fenix_create_new_world();
89
90int __fenix_repair_ranks();
91
92int __fenix_callback_register(fenix_callback_func& recover);
93
94int __fenix_callback_pop();
95
96void __fenix_callback_invoke_all(int error);
97
98int* __fenix_get_fail_ranks(int *, int, int);
99
100int __fenix_spare_rank();
101
102int __fenix_get_rank_role();
103
104void __fenix_set_rank_role(int FenixRankRole);
105
106int __fenix_detect_failures(int do_recovery);
107
108void __fenix_finalize();
109
110void __fenix_finalize_spare();
111
112void __fenix_test_MPI(MPI_Comm*, int*, ...);
113
114#endif
Definition fenix_process_recovery.hpp:77
Definition fenix_process_recovery.hpp:83