Fenix @develop
 
Loading...
Searching...
No Matches
rank_log.h
1#ifndef RANK_LOG_H
2#define RANK_LOG_H
3#include <vector>
4
5#include <mpi.h>
6
7#include "fenix/logging/task.h"
8#include "fenix/logging/ops/send_log.h"
9#include "fenix/logging/ops/irecv_log.h"
10
11namespace fenix::logging {
12struct MsgRange {
13 int first = -1, next = -1;
14 MsgRange() = default;
15 explicit MsgRange(int m_first) : first(m_first), next(first) {}
16 // Valid ranges have non-negative indices
17 bool valid() const { return 0 <= first && first <= next; }
18 // Empty ranges are valid with overlapping indices.
19 bool empty() const { return 0 <= first && first == next; }
20 // Fresh ranges are empty at 0
21 bool fresh() const { return 0 == first && first == next; }
22 std::string str() const {
23 return "[" + std::to_string(first) + "," + std::to_string(next) + ")";
24 }
25};
26
27struct Region {
28 int id = -1;
29 MsgRange send, recv;
30
31 // Valid regions have a non-negative ID and valid ranges
32 bool valid() const { return id >= 0 && send.valid() && recv.valid(); }
33 // Empty regions are valid with empty ranges
34 bool empty() const { return id >= 0 && send.empty() && recv.empty(); }
35 // Fresh regions are valid with fresh ranges
36 bool fresh() const { return id >= 0 && send.fresh() && recv.fresh(); }
37
38 Region() = default;
39 explicit Region(int m_id) : id(m_id) {}
40 Region(int m_id, int first_send, int first_recv)
41 : id(m_id), send(first_send), recv(first_recv) {}
42 Region(int m_id, const Region& o) : Region(m_id, o.send.next, o.recv.next) {
43 assert(m_id > o.id);
44 }
45
46 auto operator<=>(const Region& o) const { return id <=> o.id; }
47 auto operator<=>(const int& i) const { return id <=> i; }
48 auto operator==(const Region& o) const { return id == o.id; }
49 auto operator==(const int& i) const { return id == i; }
50 std::string str() const {
51 return "Region " + std::to_string(id) + " (send:" + send.str() +
52 ",recv:" + recv.str() + ")";
53 }
54};
55
56struct CommLog;
57
58struct RankLog {
59 RankLog(CommLog& m_comm_log, int m_rank);
60 RankLog(CommLog& m_comm_log, int m_rank, std::istream& i);
61 void serialize(std::ostream& o) const;
62
63 CommLog& comm_log;
64 const int rank;
65 TaskT task;
66
67 IrecvLog active_irecv;
68
69 void begin_region(int region_id);
70
71 // Called when the user resets consistency
72 void reset_consistency(int target_region);
73 // Called when a remote rank unexpectedly tries to form consistency with us
74 void reply_consistency();
75
76 void fenix_pre_recovery() { task = TaskT(); }
77 int send(const void* b, int n, MPI_Datatype d, int t);
78 int irecv(void* b, int n, MPI_Datatype d, int t, MPI_Request* r);
79 fenix::tasks::Status wait(MPI_Request* r);
80
81 std::string str() const;
82
83 private:
84 TaskT form_consistency();
85 void ensure_consistency();
86
87 const SendLog& log_send(const void* b, int n, MPI_Datatype d, int t);
88 void replay_messages();
89
90 void append_region(const Region& r);
91 void erase_sends(const Region& r);
92 void erase_regions(
93 std::vector<Region>::iterator begin, std::vector<Region>::iterator end
94 );
95
96 // Check if two valid region vectors are consistent
97 void check_consistent(std::vector<Region>& a, std::vector<Region>& b);
98 void recover_invalid(std::vector<Region>& a, std::vector<Region>& b);
99
100 std::set<SendLog, std::less<>> sends;
101 // Last successful send as of last recovery, according to remote
102 int already_sent = -1;
103
104 // initialized by constructor and never resized, i.e. used as a dynamic array
105 std::vector<Region> regions;
106
107 public:
108 Region& cur_region = regions.back();
109 int& next_send = cur_region.send.next;
110 int& next_recv = cur_region.recv.next;
111};
112} //namespace fenix::logging
113#endif
Definition send_log.h:12
Definition task.h:6
Definition mpi_util.hpp:115
Definition comm_log.h:37
Definition irecv_log.h:11
Definition rank_log.h:12
Definition rank_log.h:58
Definition rank_log.h:27