Fenix @develop
 
Loading...
Searching...
No Matches
fenix_data_policy_in_memory_raid.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5//
6// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _|
7// _| _| _|_| _| _| _| _|
8// _|_|_| _|_|_| _| _| _| _| _|
9// _| _| _| _|_| _| _| _|
10// _| _|_|_|_| _| _| _|_|_| _| _|
11//
12//
13//
14//
15// Copyright (C) 2016 Rutgers University and Sandia Corporation
16//
17// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18// the U.S. Government retains certain rights in this software.
19//
20// Redistribution and use in source and binary forms, with or without
21// modification, are permitted provided that the following conditions are
22// met:
23//
24// 1. Redistributions of source code must retain the above copyright
25// notice, this list of conditions and the following disclaimer.
26//
27// 2. Redistributions in binary form must reproduce the above copyright
28// notice, this list of conditions and the following disclaimer in the
29// documentation and/or other materials provided with the distribution.
30//
31// 3. Neither the name of the Corporation nor the names of the
32// contributors may be used to endorse or promote products derived from
33// this software without specific prior written permission.
34//
35// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
36// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
39// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46//
47// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar,
48// Michael Heroux, and Matthew Whitlock
49//
50// Questions? Contact Keita Teranishi (knteran@sandia.gov) and
51// Marc Gamell (mgamell@cac.rutgers.edu)
52//
53// ************************************************************************
54//@HEADER
55*/
56
57#ifndef __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__
58#define __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__
59
60#include <mpi.h>
61#include <map>
62#include <memory>
63#include <deque>
64#include <cassert>
65#include <string>
66#include "fenix_data_group.hpp"
67#include "fenix_data_buffer.hpp"
68#include "fenix_data_subset.hpp"
69#include "fenix/tasks/task.hpp"
70
71namespace fenix::data::imr {
72
73struct Entry {
74 //No copying, must be moved
75 Entry(const Entry&) = delete;
76 Entry(Entry&&);
77 Entry& operator=(Entry&&);
78
79 Entry(int size, int max_count);
80
81 //Re-initializes
82 void reset();
83
84 //Get raw buffer pointer
85 char* data();
86 //Get buffer size
87 int size();
88 //Resize buffer
89 void resize(int size);
90 //Add subset to region and ensure buf is large enough.
91 void add_and_fit(const DataSubset& subset);
92
93 DataBuffer buf;
94 DataSubset region;
95
96 char* partner_data();
97 int partner_size();
98 void partner_resize(int size);
99 void partner_add_and_fit(const DataSubset& subset);
100
101 DataBuffer partner_buf;
102 DataSubset partner_region;
103
104 int timestamp = -2;
105 int elm_size;
106 int elm_max_count;
107};
108
109struct Group;
110
111struct Member {
112 Member(fenix_member_entry_t& mentry, Group& group);
113
114 //Returns true if snapshot was found.
115 bool snapshot_delete(int timestamp);
116
117 void stage(const DataSubset& subset);
118
119 //Member::istore(v) copies local data and region.
120 tasks::Task<int> istore(const DataSubset& subset);
121 //Handles partner data and region
122 virtual tasks::Task<int> istore_impl(const DataSubset& subset) = 0;
123
124 //As istore(_impl)
125 tasks::Task<int> istorev(const DataSubset& subset);
126 virtual tasks::Task<int> istorev_impl(const DataSubset& subset) = 0;
127
128 //These call the async versions and wait on them.
129 int store(const DataSubset& subset) { return istore(subset).result(); }
130 int storev(const DataSubset& subset) { return istorev(subset).result(); }
131
132 //Restore all internal snapshot data
133 //Moves entries to align with the group's list of timestamps.
134 //Impl must actually restoring entry data
135 int restore();
136 virtual int restore_impl() = 0;
137
138 int lrestore(char* target, int max, int timestamp, DataSubset& subset);
139
140 void commit(int timestamp);
141
142 fenix_member_entry_t& mentry;
143 Group& group;
144 int id = mentry.memberid;
145 // entries to be initialized by inheritors
146 std::deque<Entry> entries;
147
148 DataBuffer& send_buf;
149 DataBuffer& recv_buf;
150};
151
152struct BuddyMember : public Member {
153 BuddyMember(fenix_member_entry_t& mentry, Group& group);
154 int restore_impl() override;
155 tasks::Task<int> istore_impl(const DataSubset& subset) override;
156 tasks::Task<int> istorev_impl(const DataSubset& subset) override;
157 tasks::Task<int> exch(
158 const DataSubset& subset, const DataSubset& partner_subset
159 );
160};
161
162struct ParityMember : public Member {
163 ParityMember(fenix_member_entry_t& mentry, Group& group);
164 int restore_impl() override;
165 tasks::Task<int> istore_impl(const DataSubset& subset) override;
166
167 tasks::Task<int> istorev_impl(const DataSubset& subset) override {
168 fatal_print("IMR mode 5 cannot storev");
169 co_return 0;
170 };
171};
172
173struct Group : public fenix_group_t {
174 Group(
175 int id, MPI_Comm comm, int timestart, int depth, int* policy, int* flag
176 );
177
178 int mode;
179 int rank_separation;
180 std::vector<int> partners;
181
182 MPI_Comm set_comm = MPI_COMM_NULL;
183 int set_size, set_rank;
184 static inline bool set_comm_revoke_callback = false;
185
186 std::map<int, std::shared_ptr<Member>> member_data;
187 std::deque<int> timestamps;
188
189 DataBuffer send_buf, recv_buf;
190
191 void sync_timestamps();
192 void build_set_comm();
193
194 //nullptr if member not found
195 Member* find_member(int member_id);
196
197 std::string str();
198
199 int group_delete() override;
200 int member_create(fenix_member_entry_t* mentry) override;
201 int member_delete(fenix_member_entry_t* mentry) override;
202 int get_redundant_policy(int* name, void* value, int* flag) override;
203
204 void member_stage(int member_id, const DataSubset& subset) override;
205
206 int member_store(int member_id, const DataSubset& subset) override;
207 int member_storev(int member_id, const DataSubset& subset) override;
208 int member_istore(
209 int member_id, const DataSubset& subset, Fenix_Request* request
210 ) override;
211 int member_istorev(
212 int member_id, const DataSubset& subset, Fenix_Request* request
213 ) override;
214
215 int commit() override;
216
217 int snapshot_delete(int timestamp) override;
218 int barrier() override;
219
220 int member_restore(
221 int member_id, void* buffer, int max, int timestamp, DataSubset& data_found
222 ) override;
223 int member_lrestore(
224 int member_id, void* buffer, int max, int timestamp, DataSubset& data_found
225 ) override;
226 int member_restore_from_rank(
227 int member_id, void* buffer, int max, int timestamp, int source_rank
228 ) override;
229
230 int member_get_attribute(
231 fenix_member_entry_t* member, int name, void* value, int* flag,
232 int sourcerank
233 ) override;
234 int member_set_attribute(
235 fenix_member_entry_t* member, int name, void* value, int* flag
236 ) override;
237
238 int get_number_of_snapshots(int* number_of_snapshots) override;
239 int get_snapshot_at_position(int position, int* timestamp) override;
240 std::vector<int> get_snapshots();
241
242 int reinit(int* flag) override;
243};
244
245} // namespace fenix::data::imr
246
247#endif //__FENIX_DATA_POLICY_IN_MEMORY_RAID_H__
Definition fenix_data_buffer.hpp:88
Definition task.hpp:16
<span class="mlabel"> UNIMPLEMENTED </span> As MPI_Request, but for Fenix asynchronous data recove...
Definition fenix.h:570
Definition fenix_data_subset.hpp:137
Definition fenix_data_group.hpp:74
Definition fenix_data_member.hpp:70
Definition fenix_data_policy_in_memory_raid.hpp:152
Definition fenix_data_policy_in_memory_raid.hpp:73
Definition fenix_data_policy_in_memory_raid.hpp:173
Definition fenix_data_policy_in_memory_raid.hpp:111
Definition fenix_data_policy_in_memory_raid.hpp:162