IOSS 2.0
Loading...
Searching...
No Matches
Ioss_ParallelUtils.h
Go to the documentation of this file.
1// Copyright(C) 1999-2024 National Technology & Engineering Solutions
2// of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
3// NTESS, the U.S. Government retains certain rights in this software.
4//
5// See packages/seacas/LICENSE for details
6
7#pragma once
8
9#include "Ioss_CodeTypes.h" // for Int64Vector, IntVector
10#include "Ioss_Utils.h"
11#include <cassert>
12#include <cstddef> // for size_t
13#include <stdint.h>
14#include <string> // for string
15#include <vector> // for vector
16
17#include "ioss_export.h"
18#if IOSS_DEBUG_OUTPUT
19#include <fmt/format.h>
20#include <fmt/ostream.h>
21#endif
22
23#ifdef SEACAS_HAVE_MPI
24#include "Ioss_SerializeIO.h"
25#endif
26
27namespace Ioss {
28 class PropertyManager;
29
30 class IOSS_EXPORT ParallelUtils
31 {
32 public:
33 ParallelUtils() = default;
34 explicit ParallelUtils(Ioss_MPI_Comm the_communicator);
35
36 enum MinMax { DO_MAX, DO_MIN, DO_SUM };
37
38#if defined(SEACAS_HAVE_MPI)
39 IOSS_NODISCARD static Ioss_MPI_Comm comm_world()
40 {
41 return (Ioss_MPI_Comm)MPI_COMM_WORLD; // CHECK: ALLOW MPI_COMM_WORLD
42 }
43 IOSS_NODISCARD static Ioss_MPI_Comm comm_self() { return (Ioss_MPI_Comm)MPI_COMM_SELF; }
44 IOSS_NODISCARD static Ioss_MPI_Comm comm_null() { return (Ioss_MPI_Comm)MPI_COMM_NULL; }
45#else
46 // NOTE: These values match those used in siMPI package.
47 IOSS_NODISCARD static constexpr Ioss_MPI_Comm comm_world() { return -100; }
48 IOSS_NODISCARD static constexpr Ioss_MPI_Comm comm_self() { return -100; }
49 IOSS_NODISCARD static constexpr Ioss_MPI_Comm comm_null() { return 0; }
50#endif
51
52 /*!
53 * See if any external properties specified via the
54 * IOSS_PROPERTIES environment variable. If any found, add to
55 * `properties`.
56 */
57 void add_environment_properties(Ioss::PropertyManager &properties);
58
59 /*!
60 * Returns 'true' if 'name' is defined in the environment.
61 * The value of the environment variable is returned in 'value'.
62 * getenv system call is only done on processor 0.
63 * If '!sync_parallel', then don't push to other processors.
64 */
65 bool get_environment(const std::string &name, std::string &value,
66 IOSS_MAYBE_UNUSED bool sync_parallel) const;
67
68 /*!
69 * Returns 'true' if 'name' is defined in the environment. The
70 * value of the environment variable is converted to an integer
71 * and returned in 'value'. No checking is done to ensure that
72 * the environment variable points to a valid integer. getenv
73 * system call is only done on processor 0. If '!sync_parallel',
74 * then don't push to other processors.
75 */
76 bool get_environment(const std::string &name, int &value,
77 IOSS_MAYBE_UNUSED bool sync_parallel) const;
78
79 /*!
80 * Returns 'true' if 'name' is defined in the environment no
81 * matter what the value. Returns false otherwise.
82 * getenv system call is only done on processor 0.
83 * If '!sync_parallel', then don't push to other processors.
84 */
85 IOSS_NODISCARD bool get_environment(const std::string &name,
86 IOSS_MAYBE_UNUSED bool sync_parallel) const;
87
88 IOSS_NODISCARD std::string decode_filename(const std::string &filename, bool is_parallel) const;
89
90 IOSS_NODISCARD Ioss_MPI_Comm communicator() const { return communicator_; }
91 IOSS_NODISCARD int parallel_size() const;
92 IOSS_NODISCARD int parallel_rank() const;
93
94 void barrier() const;
95
96 /*!
97 * Global OR of attribute strings, the processors which have no
98 * knowledge of the value should initialize to '0' and the
99 * processors with knowledge set the appropriate values.
100 */
101 void attribute_reduction(IOSS_MAYBE_UNUSED int length, IOSS_MAYBE_UNUSED char buffer[]) const;
102
103 /*!
104 * Generate a "globally unique id" which is unique over all entities
105 * of a specific type over all processors.
106 * Used by some applications for uniquely identifying an entity.
107 * If `rank` == -1, then use parallel_rank; otherwise use rank
108 */
109 IOSS_NODISCARD int64_t generate_guid(IOSS_MAYBE_UNUSED size_t id, int rank = -1) const;
110
111 /*! Return min, max, average memory used by any process */
112 void memory_stats(int64_t &min, int64_t &max, int64_t &avg) const;
113
114 /*! Return high-water-mark min, max, average memory used by any process */
115 /* May be inaccurate unless system maintains this information */
116 void hwm_memory_stats(int64_t &min, int64_t &max, int64_t &avg) const;
117
118 /*! Vector 'local_counts' contains the number of objects
119 * local to this processor. On exit, global_counts
120 * contains the total number of objects on all processors.
121 * Assumes that ordering is the same on all processors
122 */
123 void global_count(const IntVector &local_counts, IntVector &global_counts) const;
124 void global_count(const Int64Vector &local_counts, Int64Vector &global_counts) const;
125
126 template <typename T>
128 IOSS_MAYBE_UNUSED MinMax which) const;
129
130 template <typename T>
131 void global_array_minmax(IOSS_MAYBE_UNUSED std::vector<T> &local_minmax,
132 IOSS_MAYBE_UNUSED MinMax which) const;
133
134 template <typename T> void gather(T my_value, std::vector<T> &result) const;
135 template <typename T> void all_gather(T my_value, std::vector<T> &result) const;
136 template <typename T> void gather(std::vector<T> &my_values, std::vector<T> &result) const;
137 template <typename T> void all_gather(std::vector<T> &my_values, std::vector<T> &result) const;
138 template <typename T>
139 int gather(int vals_count, int size_per_val, std::vector<T> &my_values,
140 std::vector<T> &result) const;
141
142 template <typename T> void broadcast(T &my_value, int root = 0) const;
143 template <typename T> void broadcast(std::vector<T> &my_value, int root = 0) const;
144
145 void progress(const std::string &output) const;
146
147 private:
148 Ioss_MPI_Comm communicator_{comm_world()};
149 mutable int parallelSize_{-1};
150 mutable int parallelRank_{-1};
151 };
152
153#ifdef SEACAS_HAVE_MPI
154 IOSS_NODISCARD inline MPI_Datatype mpi_type(double /*dummy*/) { return MPI_DOUBLE; }
155 IOSS_NODISCARD inline MPI_Datatype mpi_type(float /*dummy*/) { return MPI_FLOAT; }
156 IOSS_NODISCARD inline MPI_Datatype mpi_type(int /*dummy*/) { return MPI_INT; }
157 IOSS_NODISCARD inline MPI_Datatype mpi_type(long int /*dummy*/) { return MPI_LONG_LONG_INT; }
158 IOSS_NODISCARD inline MPI_Datatype mpi_type(long long int /*dummy*/) { return MPI_LONG_LONG_INT; }
159 IOSS_NODISCARD inline MPI_Datatype mpi_type(unsigned int /*dummy*/) { return MPI_UNSIGNED; }
160 IOSS_NODISCARD inline MPI_Datatype mpi_type(unsigned long int /*dummy*/)
161 {
162 return MPI_UNSIGNED_LONG;
163 }
164 IOSS_NODISCARD inline MPI_Datatype mpi_type(unsigned long long int /*dummy*/)
165 {
166 return MPI_UNSIGNED_LONG_LONG;
167 }
168 IOSS_NODISCARD inline MPI_Datatype mpi_type(char /*dummy*/) { return MPI_CHAR; }
169
170 template <typename T>
171 int MY_Alltoallv64(const std::vector<T> &sendbuf, const std::vector<int64_t> &sendcounts,
172 const std::vector<int64_t> &senddisp, std::vector<T> &recvbuf,
173 const std::vector<int64_t> &recvcounts, const std::vector<int64_t> &recvdisp,
174 Ioss_MPI_Comm comm)
175 {
176 int processor_count = 0;
177 int my_processor = 0;
178 MPI_Comm_size(comm, &processor_count);
179 MPI_Comm_rank(comm, &my_processor);
180
181 // Verify that all 'counts' can fit in an integer. Symmetric
182 // communication, so recvcounts are sendcounts on another processor.
183 for (int i = 0; i < processor_count; i++) {
184 int snd_cnt = static_cast<int>(sendcounts[i]);
185 if (static_cast<int64_t>(snd_cnt) != sendcounts[i]) {
186 std::ostringstream errmsg;
187 errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n"
188 << " processor " << my_processor << " to processor " << i << " is "
189 << sendcounts[i]
190 << "\n which exceeds the storage capacity of the integers "
191 "used by MPI functions.\n";
192 IOSS_ERROR(errmsg);
193 }
194 }
195
196 size_t pow_2 = Ioss::Utils::power_2(processor_count);
197
198 for (size_t i = 1; i < pow_2; i++) {
199 MPI_Status status{};
200
201 size_t exchange_proc = i ^ my_processor;
202 if (exchange_proc < static_cast<size_t>(processor_count)) {
203 int snd_cnt = static_cast<int>(
204 sendcounts[exchange_proc]); // Converts from int64_t to int as needed by mpi
205 int rcv_cnt = static_cast<int>(recvcounts[exchange_proc]);
206 int tag = 24713;
207 if (static_cast<size_t>(my_processor) < exchange_proc) {
208 MPI_Send((void *)&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)),
209 exchange_proc, tag, comm);
210 MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag,
211 comm, &status);
212 }
213 else {
214 MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag,
215 comm, &status);
216 MPI_Send((void *)&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)),
217 exchange_proc, tag, comm);
218 }
219 }
220 }
221
222 // Take care of this processor's data movement...
223 std::copy(&sendbuf[senddisp[my_processor]],
224 &sendbuf[senddisp[my_processor] + sendcounts[my_processor]],
225 &recvbuf[recvdisp[my_processor]]);
226 return 0;
227 }
228
229 template <typename T>
230 int MY_Alltoallv(const std::vector<T> &sendbuf, const std::vector<int64_t> &sendcnts,
231 const std::vector<int64_t> &senddisp, std::vector<T> &recvbuf,
232 const std::vector<int64_t> &recvcnts, const std::vector<int64_t> &recvdisp,
233 Ioss_MPI_Comm comm)
234 {
235// Wrapper to handle case where send/recv counts and displacements are 64-bit integers.
236// Two cases:
237// 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range.
238// -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok
239// 2) They are of type 64-bit integers, and storing data in the 64-bit integer range.
240// -- call special alltoallv which does point-to-point sends
241#if IOSS_DEBUG_OUTPUT
242 {
243 Ioss::ParallelUtils utils(comm);
244 int processor_count = utils.parallel_size();
245
246 int max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
247 std::vector<int> comm_size;
248
249 utils.gather(max_comm, comm_size);
250 int my_rank = utils.parallel_rank();
251 if (my_rank == 0) {
252 fmt::print("Send Communication Size: {}\n", fmt::join(comm_size, ", "));
253 }
254 }
255#endif
256#if 1
257 int processor_count = 0;
258 MPI_Comm_size(comm, &processor_count);
259 size_t max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
260 size_t one = 1;
261 if (max_comm < one << 31) {
262 // count and displacement data in range, need to copy to integer vector.
263 std::vector<int> send_cnt(sendcnts.begin(), sendcnts.end());
264 std::vector<int> send_dis(senddisp.begin(), senddisp.end());
265 std::vector<int> recv_cnt(recvcnts.begin(), recvcnts.end());
266 std::vector<int> recv_dis(recvdisp.begin(), recvdisp.end());
267 return MPI_Alltoallv((void *)Data(sendbuf), Data(send_cnt), Data(send_dis), mpi_type(T(0)),
268 (void *)Data(recvbuf), Data(recv_cnt), Data(recv_dis), mpi_type(T(0)),
269 comm);
270 }
271 else {
272#endif
273 // Same as if each processor sent a message to every other process with:
274 // MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm);
275 // And received a message from each processor with a call to:
276 // MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm);
277 return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm);
278#if 1
279 }
280#endif
281 }
282
283 template <typename T>
284 int MY_Alltoallv(const std::vector<T> &sendbuf, const std::vector<int> &sendcnts,
285 const std::vector<int> &senddisp, std::vector<T> &recvbuf,
286 const std::vector<int> &recvcnts, const std::vector<int> &recvdisp,
287 Ioss_MPI_Comm comm)
288 {
289#if IOSS_DEBUG_OUTPUT
290 {
291 Ioss::ParallelUtils utils(comm);
292 int processor_count = utils.parallel_size();
293
294 int max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
295 std::vector<int> comm_size;
296
297 utils.gather(max_comm, comm_size);
298 int my_rank = utils.parallel_rank();
299 if (my_rank == 0) {
300 fmt::print("Send Communication Size: {}\n", fmt::join(comm_size, ", "));
301 }
302 }
303#endif
304 return MPI_Alltoallv((void *)Data(sendbuf), const_cast<int *>(Data(sendcnts)),
305 const_cast<int *>(Data(senddisp)), mpi_type(T(0)), Data(recvbuf),
306 const_cast<int *>(Data(recvcnts)), const_cast<int *>(Data(recvdisp)),
307 mpi_type(T(0)), comm);
308 }
309#endif
310
311 template <typename T>
312 void ParallelUtils::global_array_minmax(IOSS_MAYBE_UNUSED std::vector<T> &local_minmax,
313 IOSS_MAYBE_UNUSED MinMax which) const
314 {
315 IOSS_PAR_UNUSED(local_minmax);
316 IOSS_PAR_UNUSED(which);
317#ifdef SEACAS_HAVE_MPI
318 if (parallel_size() > 1 && !local_minmax.empty()) {
320 std::ostringstream errmsg;
321 errmsg << "Attempting mpi while in barrier owned by " << Ioss::SerializeIO::getOwner();
322 IOSS_ERROR(errmsg);
323 }
324
325 std::vector<T> maxout(local_minmax.size());
326 MPI_Op oper = MPI_MAX;
327 if (which == Ioss::ParallelUtils::DO_MAX) {
328 oper = MPI_MAX;
329 }
330 else if (which == Ioss::ParallelUtils::DO_MIN) {
331 oper = MPI_MIN;
332 }
333 else if (which == Ioss::ParallelUtils::DO_SUM) {
334 oper = MPI_SUM;
335 }
336
337 const int success =
338 MPI_Allreduce((void *)(Data(local_minmax)), Data(maxout),
339 static_cast<int>(local_minmax.size()), mpi_type(T()), oper, communicator_);
340 if (success != MPI_SUCCESS) {
341 std::ostringstream errmsg;
342 errmsg << "Ioss::ParallelUtils::global_array_minmax - MPI_Allreduce failed";
343 IOSS_ERROR(errmsg);
344 }
345 // Now copy back into passed in array...
346 for (size_t i = 0; i < local_minmax.size(); i++) {
347 local_minmax[i] = maxout[i];
348 }
349 }
350#endif
351 }
352
353} // namespace Ioss
#define IOSS_MAYBE_UNUSED
Definition Ioss_CodeTypes.h:53
#define IOSS_NODISCARD
Definition Ioss_CodeTypes.h:54
int Ioss_MPI_Comm
Definition Ioss_CodeTypes.h:63
#define IOSS_PAR_UNUSED(x)
Definition Ioss_CodeTypes.h:68
IOSS_NODISCARD constexpr T * Data(std::vector< T > &vec)
Definition Ioss_Utils.h:56
void IOSS_ERROR(const std::ostringstream &errmsg)
Definition Ioss_Utils.h:38
Definition Ioss_ParallelUtils.h:31
ParallelUtils()=default
void global_array_minmax(IOSS_MAYBE_UNUSED std::vector< T > &local_minmax, IOSS_MAYBE_UNUSED MinMax which) const
Definition Ioss_ParallelUtils.h:312
int gather(int vals_count, int size_per_val, std::vector< T > &my_values, std::vector< T > &result) const
void broadcast(std::vector< T > &my_value, int root=0) const
static IOSS_NODISCARD constexpr Ioss_MPI_Comm comm_self()
Definition Ioss_ParallelUtils.h:48
IOSS_NODISCARD int parallel_size() const
Definition Ioss_ParallelUtils.C:206
static IOSS_NODISCARD constexpr Ioss_MPI_Comm comm_null()
Definition Ioss_ParallelUtils.h:49
void broadcast(T &my_value, int root=0) const
static IOSS_NODISCARD constexpr Ioss_MPI_Comm comm_world()
Definition Ioss_ParallelUtils.h:47
IOSS_NODISCARD T global_minmax(IOSS_MAYBE_UNUSED T local_minmax, IOSS_MAYBE_UNUSED MinMax which) const
MinMax
Definition Ioss_ParallelUtils.h:36
@ DO_MAX
Definition Ioss_ParallelUtils.h:36
@ DO_MIN
Definition Ioss_ParallelUtils.h:36
@ DO_SUM
Definition Ioss_ParallelUtils.h:36
Ioss_MPI_Comm communicator_
Definition Ioss_ParallelUtils.h:148
IOSS_NODISCARD Ioss_MPI_Comm communicator() const
Definition Ioss_ParallelUtils.h:90
A collection of Ioss::Property objects.
Definition Ioss_PropertyManager.h:36
static IOSS_NODISCARD bool isEnabled()
Definition Ioss_SerializeIO.h:81
static IOSS_NODISCARD int getOwner()
Definition Ioss_SerializeIO.h:69
static IOSS_NODISCARD bool inBarrier()
Definition Ioss_SerializeIO.h:83
static IOSS_NODISCARD int power_2(int count)
Definition Ioss_Utils.h:273
The main namespace for the Ioss library.
Definition Ioad_DatabaseIO.C:40
std::vector< int64_t > Int64Vector
Definition Ioss_CodeTypes.h:22
std::vector< int > IntVector
Definition Ioss_CodeTypes.h:21