Fenix @develop
 
Loading...
Searching...
No Matches
fenix_ext.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5//
6// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _|
7// _| _| _|_| _| _| _| _|
8// _|_|_| _|_|_| _| _| _| _| _|
9// _| _| _| _|_| _| _| _|
10// _| _|_|_|_| _| _| _|_|_| _| _|
11//
12//
13//
14//
15// Copyright (C) 2016 Rutgers University and Sandia Corporation
16//
17// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18// the U.S. Government retains certain rights in this software.
19//
20// Redistribution and use in source and binary forms, with or without
21// modification, are permitted provided that the following conditions are
22// met:
23//
24// 1. Redistributions of source code must retain the above copyright
25// notice, this list of conditions and the following disclaimer.
26//
27// 2. Redistributions in binary form must reproduce the above copyright
28// notice, this list of conditions and the following disclaimer in the
29// documentation and/or other materials provided with the distribution.
30//
31// 3. Neither the name of the Corporation nor the names of the
32// contributors may be used to endorse or promote products derived from
33// this software without specific prior written permission.
34//
35// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
36// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
39// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46//
47// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar,
48// Michael Heroux, and Matthew Whitlock
49//
50// Questions? Contact Keita Teranishi (knteran@sandia.gov) and
51// Marc Gamell (mgamell@cac.rutgers.edu)
52//
53// ************************************************************************
54//@HEADER
55*/
56
57#ifndef __FENIX_EXT_H__
58#define __FENIX_EXT_H__
59
60#include <mpi.h>
61#include <vector>
62#include "fenix.h"
63#include "fenix_opt.hpp"
64#include "fenix_data_group.hpp"
65#include "fenix_process_recovery.hpp"
66
67typedef struct {
68 int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init
69 int num_survivor_ranks; // Keeps the global information on the number of survived MPI ranks after failure
70 int num_recovered_ranks; // Keeps the number of spare ranks brought into MPI communicator recovery
71 int resume_mode; // Defines how program resumes after process recovery
72 int spawn_policy; // Indicate dynamic process spawning
73 int spare_ranks; // Spare ranks entered by user to repair failed ranks
74 int repair_result; // Internal global variable to store the result of MPI communicator repair
75 int finalized;
76 jmp_buf *recover_environment; // Calling environment to fill the jmp_buf structure
77
78
79 //enum FenixRankRole role; // Role of rank: initial, survivor or repair
80 int role; // Role of rank: initial, survivor or repair
81 int fenix_init_flag = 0;
82
83 int fail_world_size;
84 int* fail_world;
85
86 //Save the pointer to role and error of Fenix_Init
87 int *ret_role;
88 int *ret_error;
89
90 std::vector<fenix_callback_func> callbacks;
91 fenix_debug_opt_t options; // This is reserved to store the user options
92
93 MPI_Comm *world; // Duplicate of the MPI communicator provided by user
94 MPI_Comm new_world; // Global MPI communicator identical to g_world but without spare ranks
95 MPI_Comm *user_world; // MPI communicator with repaired ranks
96 //Manage state of the comms. Necessary when failures happen rapidly, mussing up state
97 int new_world_exists, user_world_exists;
98
99 int dummy_recv_buffer;
100 MPI_Request check_failures_req;
101
102
103 MPI_Op agree_op; // This is reserved for the global agreement call for Fenix data recovery API
104
105
106 MPI_Errhandler mpi_errhandler; // This stores callback info for our custom error handler
107 int ignore_errs; // Set this to return errors instead of using the error handler normally. (Don't forget to unset!)
108 int print_unhandled; // Set this to print the error string for MPI errors of an unhandled return type.
109
110 fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure
111} fenix_t;
112
113inline fenix_t fenix_rt;
114#endif // __FENIX_EXT_H__
115
Contains all API function calls and Fenix types. This is the only header file a user should include.
Definition fenix_data_group.hpp:146
Definition fenix_opt.hpp:80
Definition fenix_ext.hpp:67