IOSS 2.0
Loading...
Searching...
No Matches
Ioss_ParallelUtils.h
Go to the documentation of this file.
1// Copyright(C) 1999-2024 National Technology & Engineering Solutions
2// of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
3// NTESS, the U.S. Government retains certain rights in this software.
4//
5// See packages/seacas/LICENSE for details
6
7#pragma once
8
9#include "Ioss_CodeTypes.h" // for Int64Vector, IntVector
10#include "Ioss_Utils.h"
11#include <cassert>
12#include <cstddef> // for size_t
13#include <stdint.h>
14#include <string> // for string
15#include <vector> // for vector
16
17#include "ioss_export.h"
18#if IOSS_DEBUG_OUTPUT
19#include <fmt/format.h>
20#include <fmt/ostream.h>
21#include <fmt/ranges.h>
22#endif
23
24#ifdef SEACAS_HAVE_MPI
25#include "Ioss_SerializeIO.h"
26#endif
27
28namespace Ioss {
29 class PropertyManager;
30
31 class IOSS_EXPORT ParallelUtils
32 {
33 public:
34 ParallelUtils() = default;
35 explicit ParallelUtils(Ioss_MPI_Comm the_communicator);
36
37 enum MinMax { DO_MAX, DO_MIN, DO_SUM };
38
39#if defined(SEACAS_HAVE_MPI)
40 IOSS_NODISCARD static Ioss_MPI_Comm comm_world()
41 {
42 return (Ioss_MPI_Comm)MPI_COMM_WORLD; // CHECK: ALLOW MPI_COMM_WORLD
43 }
44 IOSS_NODISCARD static Ioss_MPI_Comm comm_self() { return (Ioss_MPI_Comm)MPI_COMM_SELF; }
45 IOSS_NODISCARD static Ioss_MPI_Comm comm_null() { return (Ioss_MPI_Comm)MPI_COMM_NULL; }
46#else
47 // NOTE: These values match those used in siMPI package.
48 IOSS_NODISCARD static constexpr Ioss_MPI_Comm comm_world() { return -100; }
49 IOSS_NODISCARD static constexpr Ioss_MPI_Comm comm_self() { return -100; }
50 IOSS_NODISCARD static constexpr Ioss_MPI_Comm comm_null() { return 0; }
51#endif
52
53 /*!
54 * See if any external properties specified via the
55 * IOSS_PROPERTIES environment variable. If any found, add to
56 * `properties`.
57 */
58 void add_environment_properties(Ioss::PropertyManager &properties);
59
60 /*!
61 * Returns 'true' if 'name' is defined in the environment.
62 * The value of the environment variable is returned in 'value'.
63 * getenv system call is only done on processor 0.
64 * If '!sync_parallel', then don't push to other processors.
65 */
66 bool get_environment(const std::string &name, std::string &value,
67 IOSS_MAYBE_UNUSED bool sync_parallel) const;
68
69 /*!
70 * Returns 'true' if 'name' is defined in the environment. The
71 * value of the environment variable is converted to an integer
72 * and returned in 'value'. No checking is done to ensure that
73 * the environment variable points to a valid integer. getenv
74 * system call is only done on processor 0. If '!sync_parallel',
75 * then don't push to other processors.
76 */
77 bool get_environment(const std::string &name, int &value,
78 IOSS_MAYBE_UNUSED bool sync_parallel) const;
79
80 /*!
81 * Returns 'true' if 'name' is defined in the environment no
82 * matter what the value. Returns false otherwise.
83 * getenv system call is only done on processor 0.
84 * If '!sync_parallel', then don't push to other processors.
85 */
86 IOSS_NODISCARD bool get_environment(const std::string &name,
87 IOSS_MAYBE_UNUSED bool sync_parallel) const;
88
89 IOSS_NODISCARD std::string decode_filename(const std::string &filename, bool is_parallel) const;
90
91 IOSS_NODISCARD Ioss_MPI_Comm communicator() const { return communicator_; }
92 IOSS_NODISCARD int parallel_size() const;
93 IOSS_NODISCARD int parallel_rank() const;
94
95 void barrier() const;
96
97 /*!
98 * Global OR of attribute strings, the processors which have no
99 * knowledge of the value should initialize to '0' and the
100 * processors with knowledge set the appropriate values.
101 */
102 void attribute_reduction(IOSS_MAYBE_UNUSED int length, IOSS_MAYBE_UNUSED char buffer[]) const;
103
104 /*!
105 * Generate a "globally unique id" which is unique over all entities
106 * of a specific type over all processors.
107 * Used by some applications for uniquely identifying an entity.
108 * If `rank` == -1, then use parallel_rank; otherwise use rank
109 */
110 IOSS_NODISCARD int64_t generate_guid(IOSS_MAYBE_UNUSED size_t id, int rank = -1) const;
111
112 /*! Return min, max, average memory used by any process */
113 void memory_stats(int64_t &min, int64_t &max, int64_t &avg) const;
114
115 /*! Return high-water-mark min, max, average memory used by any process */
116 /* May be inaccurate unless system maintains this information */
117 void hwm_memory_stats(int64_t &min, int64_t &max, int64_t &avg) const;
118
119 /*! Vector 'local_counts' contains the number of objects
120 * local to this processor. On exit, global_counts
121 * contains the total number of objects on all processors.
122 * Assumes that ordering is the same on all processors
123 */
124 void global_count(const IntVector &local_counts, IntVector &global_counts) const;
125 void global_count(const Int64Vector &local_counts, Int64Vector &global_counts) const;
126
127 template <typename T>
129 IOSS_MAYBE_UNUSED MinMax which) const;
130
131 template <typename T>
132 void global_array_minmax(IOSS_MAYBE_UNUSED std::vector<T> &local_minmax,
133 IOSS_MAYBE_UNUSED MinMax which) const;
134
135 template <typename T> void gather(T my_value, std::vector<T> &result) const;
136 template <typename T> void all_gather(T my_value, std::vector<T> &result) const;
137 template <typename T> void gather(std::vector<T> &my_values, std::vector<T> &result) const;
138 template <typename T> void all_gather(std::vector<T> &my_values, std::vector<T> &result) const;
139 template <typename T>
140 int gather(int vals_count, int size_per_val, std::vector<T> &my_values,
141 std::vector<T> &result) const;
142
143 template <typename T> void broadcast(T &my_value, int root = 0) const;
144 template <typename T> void broadcast(std::vector<T> &my_value, int root = 0) const;
145
146 void progress(const std::string &output) const;
147
148 private:
149 Ioss_MPI_Comm communicator_{comm_world()};
150 mutable int parallelSize_{-1};
151 mutable int parallelRank_{-1};
152 };
153
154#ifdef SEACAS_HAVE_MPI
155 IOSS_NODISCARD inline MPI_Datatype mpi_type(double /*dummy*/) { return MPI_DOUBLE; }
156 IOSS_NODISCARD inline MPI_Datatype mpi_type(float /*dummy*/) { return MPI_FLOAT; }
157 IOSS_NODISCARD inline MPI_Datatype mpi_type(int /*dummy*/) { return MPI_INT; }
158 IOSS_NODISCARD inline MPI_Datatype mpi_type(long int /*dummy*/) { return MPI_LONG_LONG_INT; }
159 IOSS_NODISCARD inline MPI_Datatype mpi_type(long long int /*dummy*/) { return MPI_LONG_LONG_INT; }
160 IOSS_NODISCARD inline MPI_Datatype mpi_type(unsigned int /*dummy*/) { return MPI_UNSIGNED; }
161 IOSS_NODISCARD inline MPI_Datatype mpi_type(unsigned long int /*dummy*/)
162 {
163 return MPI_UNSIGNED_LONG;
164 }
165 IOSS_NODISCARD inline MPI_Datatype mpi_type(unsigned long long int /*dummy*/)
166 {
167 return MPI_UNSIGNED_LONG_LONG;
168 }
169 IOSS_NODISCARD inline MPI_Datatype mpi_type(char /*dummy*/) { return MPI_CHAR; }
170
171 template <typename T>
172 int MY_Alltoallv64(const std::vector<T> &sendbuf, const std::vector<int64_t> &sendcounts,
173 const std::vector<int64_t> &senddisp, std::vector<T> &recvbuf,
174 const std::vector<int64_t> &recvcounts, const std::vector<int64_t> &recvdisp,
175 Ioss_MPI_Comm comm)
176 {
177 int processor_count = 0;
178 int my_processor = 0;
179 MPI_Comm_size(comm, &processor_count);
180 MPI_Comm_rank(comm, &my_processor);
181
182 // Verify that all 'counts' can fit in an integer. Symmetric
183 // communication, so recvcounts are sendcounts on another processor.
184 for (int i = 0; i < processor_count; i++) {
185 int snd_cnt = static_cast<int>(sendcounts[i]);
186 if (static_cast<int64_t>(snd_cnt) != sendcounts[i]) {
187 std::ostringstream errmsg;
188 errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n"
189 << " processor " << my_processor << " to processor " << i << " is "
190 << sendcounts[i]
191 << "\n which exceeds the storage capacity of the integers "
192 "used by MPI functions.\n";
193 IOSS_ERROR(errmsg);
194 }
195 }
196
197 size_t pow_2 = Ioss::Utils::power_2(processor_count);
198
199 for (size_t i = 1; i < pow_2; i++) {
200 MPI_Status status{};
201
202 size_t exchange_proc = i ^ my_processor;
203 if (exchange_proc < static_cast<size_t>(processor_count)) {
204 int snd_cnt = static_cast<int>(
205 sendcounts[exchange_proc]); // Converts from int64_t to int as needed by mpi
206 int rcv_cnt = static_cast<int>(recvcounts[exchange_proc]);
207 int tag = 24713;
208 if (static_cast<size_t>(my_processor) < exchange_proc) {
209 MPI_Send((void *)&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)),
210 exchange_proc, tag, comm);
211 MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag,
212 comm, &status);
213 }
214 else {
215 MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag,
216 comm, &status);
217 MPI_Send((void *)&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)),
218 exchange_proc, tag, comm);
219 }
220 }
221 }
222
223 // Take care of this processor's data movement...
224 std::copy(&sendbuf[senddisp[my_processor]],
225 &sendbuf[senddisp[my_processor] + sendcounts[my_processor]],
226 &recvbuf[recvdisp[my_processor]]);
227 return 0;
228 }
229
230 template <typename T>
231 int MY_Alltoallv(const std::vector<T> &sendbuf, const std::vector<int64_t> &sendcnts,
232 const std::vector<int64_t> &senddisp, std::vector<T> &recvbuf,
233 const std::vector<int64_t> &recvcnts, const std::vector<int64_t> &recvdisp,
234 Ioss_MPI_Comm comm)
235 {
236// Wrapper to handle case where send/recv counts and displacements are 64-bit integers.
237// Two cases:
238// 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range.
239// -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok
240// 2) They are of type 64-bit integers, and storing data in the 64-bit integer range.
241// -- call special alltoallv which does point-to-point sends
242#if IOSS_DEBUG_OUTPUT
243 {
244 Ioss::ParallelUtils utils(comm);
245 int processor_count = utils.parallel_size();
246
247 int max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
248 std::vector<int> comm_size;
249
250 utils.gather(max_comm, comm_size);
251 int my_rank = utils.parallel_rank();
252 if (my_rank == 0) {
253 fmt::print("Send Communication Size: {}\n", fmt::join(comm_size, ", "));
254 }
255 }
256#endif
257#if 1
258 int processor_count = 0;
259 MPI_Comm_size(comm, &processor_count);
260 size_t max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
261 size_t one = 1;
262 if (max_comm < one << 31) {
263 // count and displacement data in range, need to copy to integer vector.
264 std::vector<int> send_cnt(sendcnts.begin(), sendcnts.end());
265 std::vector<int> send_dis(senddisp.begin(), senddisp.end());
266 std::vector<int> recv_cnt(recvcnts.begin(), recvcnts.end());
267 std::vector<int> recv_dis(recvdisp.begin(), recvdisp.end());
268 return MPI_Alltoallv((void *)Data(sendbuf), Data(send_cnt), Data(send_dis), mpi_type(T(0)),
269 (void *)Data(recvbuf), Data(recv_cnt), Data(recv_dis), mpi_type(T(0)),
270 comm);
271 }
272 else {
273#endif
274 // Same as if each processor sent a message to every other process with:
275 // MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm);
276 // And received a message from each processor with a call to:
277 // MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm);
278 return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm);
279#if 1
280 }
281#endif
282 }
283
284 template <typename T>
285 int MY_Alltoallv(const std::vector<T> &sendbuf, const std::vector<int> &sendcnts,
286 const std::vector<int> &senddisp, std::vector<T> &recvbuf,
287 const std::vector<int> &recvcnts, const std::vector<int> &recvdisp,
288 Ioss_MPI_Comm comm)
289 {
290#if IOSS_DEBUG_OUTPUT
291 {
292 Ioss::ParallelUtils utils(comm);
293 int processor_count = utils.parallel_size();
294
295 int max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
296 std::vector<int> comm_size;
297
298 utils.gather(max_comm, comm_size);
299 int my_rank = utils.parallel_rank();
300 if (my_rank == 0) {
301 fmt::print("Send Communication Size: {}\n", fmt::join(comm_size, ", "));
302 }
303 }
304#endif
305 return MPI_Alltoallv((void *)Data(sendbuf), const_cast<int *>(Data(sendcnts)),
306 const_cast<int *>(Data(senddisp)), mpi_type(T(0)), Data(recvbuf),
307 const_cast<int *>(Data(recvcnts)), const_cast<int *>(Data(recvdisp)),
308 mpi_type(T(0)), comm);
309 }
310#endif
311
312 template <typename T>
313 void ParallelUtils::global_array_minmax(IOSS_MAYBE_UNUSED std::vector<T> &local_minmax,
314 IOSS_MAYBE_UNUSED MinMax which) const
315 {
316 IOSS_PAR_UNUSED(local_minmax);
317 IOSS_PAR_UNUSED(which);
318#ifdef SEACAS_HAVE_MPI
319 if (parallel_size() > 1 && !local_minmax.empty()) {
321 std::ostringstream errmsg;
322 errmsg << "Attempting mpi while in barrier owned by " << Ioss::SerializeIO::getOwner();
323 IOSS_ERROR(errmsg);
324 }
325
326 std::vector<T> maxout(local_minmax.size());
327 MPI_Op oper = MPI_MAX;
328 if (which == Ioss::ParallelUtils::DO_MAX) {
329 oper = MPI_MAX;
330 }
331 else if (which == Ioss::ParallelUtils::DO_MIN) {
332 oper = MPI_MIN;
333 }
334 else if (which == Ioss::ParallelUtils::DO_SUM) {
335 oper = MPI_SUM;
336 }
337
338 const int success =
339 MPI_Allreduce((void *)(Data(local_minmax)), Data(maxout),
340 static_cast<int>(local_minmax.size()), mpi_type(T()), oper, communicator_);
341 if (success != MPI_SUCCESS) {
342 std::ostringstream errmsg;
343 errmsg << "Ioss::ParallelUtils::global_array_minmax - MPI_Allreduce failed";
344 IOSS_ERROR(errmsg);
345 }
346 // Now copy back into passed in array...
347 for (size_t i = 0; i < local_minmax.size(); i++) {
348 local_minmax[i] = maxout[i];
349 }
350 }
351#endif
352 }
353
354} // namespace Ioss
#define IOSS_MAYBE_UNUSED
Definition Ioss_CodeTypes.h:54
#define IOSS_NODISCARD
Definition Ioss_CodeTypes.h:55
int Ioss_MPI_Comm
Definition Ioss_CodeTypes.h:64
#define IOSS_PAR_UNUSED(x)
Definition Ioss_CodeTypes.h:69
IOSS_NODISCARD constexpr T * Data(std::vector< T > &vec)
Definition Ioss_Utils.h:56
void IOSS_ERROR(const std::ostringstream &errmsg)
Definition Ioss_Utils.h:38
Definition Ioss_ParallelUtils.h:32
ParallelUtils()=default
void global_array_minmax(IOSS_MAYBE_UNUSED std::vector< T > &local_minmax, IOSS_MAYBE_UNUSED MinMax which) const
Definition Ioss_ParallelUtils.h:313
int gather(int vals_count, int size_per_val, std::vector< T > &my_values, std::vector< T > &result) const
void broadcast(std::vector< T > &my_value, int root=0) const
static IOSS_NODISCARD constexpr Ioss_MPI_Comm comm_self()
Definition Ioss_ParallelUtils.h:49
IOSS_NODISCARD int parallel_size() const
Definition Ioss_ParallelUtils.C:206
static IOSS_NODISCARD constexpr Ioss_MPI_Comm comm_null()
Definition Ioss_ParallelUtils.h:50
void broadcast(T &my_value, int root=0) const
static IOSS_NODISCARD constexpr Ioss_MPI_Comm comm_world()
Definition Ioss_ParallelUtils.h:48
IOSS_NODISCARD T global_minmax(IOSS_MAYBE_UNUSED T local_minmax, IOSS_MAYBE_UNUSED MinMax which) const
MinMax
Definition Ioss_ParallelUtils.h:37
@ DO_MAX
Definition Ioss_ParallelUtils.h:37
@ DO_MIN
Definition Ioss_ParallelUtils.h:37
@ DO_SUM
Definition Ioss_ParallelUtils.h:37
Ioss_MPI_Comm communicator_
Definition Ioss_ParallelUtils.h:149
IOSS_NODISCARD Ioss_MPI_Comm communicator() const
Definition Ioss_ParallelUtils.h:91
A collection of Ioss::Property objects.
Definition Ioss_PropertyManager.h:36
static IOSS_NODISCARD bool isEnabled()
Definition Ioss_SerializeIO.h:81
static IOSS_NODISCARD int getOwner()
Definition Ioss_SerializeIO.h:69
static IOSS_NODISCARD bool inBarrier()
Definition Ioss_SerializeIO.h:83
static IOSS_NODISCARD int power_2(int count)
Definition Ioss_Utils.h:270
The main namespace for the Ioss library.
Definition Ioad_DatabaseIO.C:40
std::vector< int64_t > Int64Vector
Definition Ioss_CodeTypes.h:22
std::vector< int > IntVector
Definition Ioss_CodeTypes.h:21