8.3
general documentation
cs_parall.h
Go to the documentation of this file.
1#ifndef __CS_PARALL_H__
2#define __CS_PARALL_H__
3
4/*============================================================================
5 * Functions dealing with parallelism
6 *============================================================================*/
7
8/*
9 This file is part of code_saturne, a general-purpose CFD tool.
10
11 Copyright (C) 1998-2024 EDF S.A.
12
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; either version 2 of the License, or (at your option) any later
16 version.
17
18 This program is distributed in the hope that it will be useful, but WITHOUT
19 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
21 details.
22
23 You should have received a copy of the GNU General Public License along with
24 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25 Street, Fifth Floor, Boston, MA 02110-1301, USA.
26*/
27
28/*----------------------------------------------------------------------------*/
29
30/*----------------------------------------------------------------------------
31 * Local headers
32 *----------------------------------------------------------------------------*/
33
34#include "cs_defs.h"
36
37/*----------------------------------------------------------------------------*/
38
40
41/*============================================================================
42 * General types and macros used throughout code_saturne
43 *============================================================================*/
44
45/*----------------------------------------------------------------------------
46 * Variable value type.
47 *----------------------------------------------------------------------------*/
48
52typedef enum {
53
63
64/*============================================================================
65 * Global variables
66 *============================================================================*/
67
68/* Preferred indexed sum option, adapted to shared-memory parallelism */
69
71
72/*=============================================================================
73 * Public function prototypes
74 *============================================================================*/
75
76/*----------------------------------------------------------------------------*/
83/*----------------------------------------------------------------------------*/
84
85#if defined(HAVE_MPI_IN_PLACE)
86
87inline static void
89 const int n)
90{
91 if (cs_glob_n_ranks > 1) {
92 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
94 }
95}
96
97#else
98
99void
101 const int n);
102
103#endif
104
105/*----------------------------------------------------------------------------*/
112/*----------------------------------------------------------------------------*/
113
114#if defined(HAVE_MPI_IN_PLACE)
115
116inline static void
118 const int n)
119{
120 if (cs_glob_n_ranks > 1) {
121 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
123 }
124}
125
126#else
127
128void
130 const int n);
131
132#endif
133
134/*----------------------------------------------------------------------------*/
142/*----------------------------------------------------------------------------*/
143
144#if defined(HAVE_MPI_IN_PLACE)
145
146inline static void
148 cs_datatype_t datatype,
149 void *val)
150{
151 if (cs_glob_n_ranks > 1) {
152 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_SUM,
154 }
155}
156
157#else
158
159void
160cs_parall_sum(int n,
161 cs_datatype_t datatype,
162 void *val);
163
164#endif
165
166/*----------------------------------------------------------------------------*/
175/*----------------------------------------------------------------------------*/
176
177#if defined(HAVE_MPI_IN_PLACE)
178
179inline static void
181 cs_datatype_t datatype,
182 void *val)
183{
184 if (cs_glob_n_ranks > 1) {
185 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MAX,
187 }
188}
189
190#else
191
192void
193cs_parall_max(int n,
194 cs_datatype_t datatype,
195 void *val);
196
197#endif
198
199/*----------------------------------------------------------------------------*/
208/*----------------------------------------------------------------------------*/
209
210#if defined(HAVE_MPI_IN_PLACE)
211
212inline static void
214 cs_datatype_t datatype,
215 void *val)
216{
217 if (cs_glob_n_ranks > 1) {
218 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MIN,
220 }
221}
222
223#else
224
225void
226cs_parall_min(int n,
227 cs_datatype_t datatype,
228 void *val);
229
230#endif
231
232/*----------------------------------------------------------------------------*/
243/*----------------------------------------------------------------------------*/
244
245#if defined(HAVE_MPI)
246
247inline static void
248cs_parall_bcast(int root_rank,
249 int n,
250 cs_datatype_t datatype,
251 void *val)
252{
253 if (cs_glob_n_ranks > 1)
254 MPI_Bcast(val, n, cs_datatype_to_mpi[datatype], root_rank,
256}
257
258#else
259
260#define cs_parall_bcast(_root_rank, _n, _datatype, _val);
261
262#endif
263
264/*----------------------------------------------------------------------------*/
280/*----------------------------------------------------------------------------*/
281
282void
283cs_parall_allgather_r(int n_elts,
284 int n_g_elts,
285 cs_real_t array[],
286 cs_real_t g_array[]);
287
288/*----------------------------------------------------------------------------*/
308/*----------------------------------------------------------------------------*/
309
310void
312 int n_g_elts,
313 int stride,
314 cs_real_t o_key[],
315 cs_real_t array[],
316 cs_real_t g_array[]);
317
318/*----------------------------------------------------------------------------*/
336/*----------------------------------------------------------------------------*/
337
338void
339cs_parall_gather_r(int root_rank,
340 int n_elts,
341 int n_g_elts,
342 const cs_real_t array[],
343 cs_real_t g_array[]);
344
345/*----------------------------------------------------------------------------*/
367/*----------------------------------------------------------------------------*/
368
369void
370cs_parall_gather_ordered_r(int root_rank,
371 int n_elts,
372 int n_g_elts,
373 int stride,
374 cs_real_t o_key[],
375 cs_real_t array[],
376 cs_real_t g_array[]);
377
378/*----------------------------------------------------------------------------*/
396/*----------------------------------------------------------------------------*/
397
398void
399cs_parall_scatter_r(int root_rank,
400 int n_elts,
401 int n_g_elts,
402 const cs_real_t g_array[],
403 cs_real_t array[]);
404
405/*----------------------------------------------------------------------------*/
424/*----------------------------------------------------------------------------*/
425
426void
427cs_parall_gather_f(int root_rank,
428 int n_elts,
429 int n_g_elts,
430 const float array[],
431 float g_array[]);
432
433/*----------------------------------------------------------------------------*/
452/*----------------------------------------------------------------------------*/
453
454void
455cs_parall_scatter_f(int root_rank,
456 int n_elts,
457 int n_g_elts,
458 const float g_array[],
459 float array[]);
460
461/*----------------------------------------------------------------------------*/
471/*----------------------------------------------------------------------------*/
472
473void
475 cs_real_t *max,
476 cs_real_t max_loc_vals[]);
477
478/*----------------------------------------------------------------------------*/
488/*----------------------------------------------------------------------------*/
489
490void
492 cs_real_t *min,
493 cs_real_t min_loc_vals[]);
494
495/*----------------------------------------------------------------------------*/
506/*----------------------------------------------------------------------------*/
507
508void
510 int *rank_id,
511 cs_real_t val);
512
513/*----------------------------------------------------------------------------*/
522/*----------------------------------------------------------------------------*/
523
524size_t
526
527/*----------------------------------------------------------------------------*/
537/*----------------------------------------------------------------------------*/
538
539void
540cs_parall_set_min_coll_buf_size(size_t buffer_size);
541
542/*----------------------------------------------------------------------------*/
552/*----------------------------------------------------------------------------*/
553
554inline static int
556 cs_lnum_t min_thread_elements)
557{
558#if defined(HAVE_OPENMP)
559 int n_t = omp_get_max_threads();
560 int n_t_l = n_elements / min_thread_elements;
561 if (n_t_l < n_t)
562 n_t = n_t_l;
563 if (n_t < 1)
564 n_t = 1;
565 return n_t;
566#else
567 CS_UNUSED(n_elements); /* avoid compiler warning */
568 CS_UNUSED(min_thread_elements);
569 return 1;
570#endif
571}
572
573/*----------------------------------------------------------------------------*/
586/*----------------------------------------------------------------------------*/
587
588inline static void
590 size_t type_size,
591 cs_lnum_t *s_id,
592 cs_lnum_t *e_id)
593{
594#if defined(HAVE_OPENMP)
595 const int t_id = omp_get_thread_num();
596 const int n_t = omp_get_num_threads();
597 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
598 const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
599
600 *s_id = t_id * t_n;
601 *e_id = (t_id+1) * t_n;
602 *s_id = cs_align(*s_id, cl_m);
603 *e_id = cs_align(*e_id, cl_m);
604 if (*e_id > n) *e_id = n;
605#else
606 CS_UNUSED(type_size); /* avoid compiler warning */
607 *s_id = 0;
608 *e_id = n;
609#endif
610}
611
612/*----------------------------------------------------------------------------*/
631/*----------------------------------------------------------------------------*/
632
633inline static void
635 size_t type_size,
636 cs_lnum_t *s_id,
637 cs_lnum_t *e_id)
638{
639#if defined(HAVE_OPENMP)
640 const int t_id = omp_get_thread_num();
641 const double n_t = omp_get_num_threads();
642 const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
643
644 double r0 = (double)t_id / (double)n_t;
645 double r1 = (double)(t_id+1) / (double)n_t;
646
647 r0 = r0*r0;
648 r1 = r1*r1;
649
650 const cs_lnum_t t_0 = r0*n;
651 const cs_lnum_t t_1 = r1*n;
652
653 *s_id = t_0 * n;
654 *e_id = t_1 * n;
655 *s_id = cs_align(*s_id, cl_m);
656 *e_id = cs_align(*e_id, cl_m);
657 if (*e_id > n) *e_id = n;
658#else
659 CS_UNUSED(type_size); /* avoid compiler warning */
660 *s_id = 0;
661 *e_id = n;
662#endif
663}
664
665/*----------------------------------------------------------------------------*/
674/*----------------------------------------------------------------------------*/
675
676static inline size_t
678 size_t block_size)
679{
680 return (n % block_size) ? n/block_size + 1 : n/block_size;
681}
682
683/*----------------------------------------------------------------------------*/
684
686
687#if defined(__cplusplus)
688
689/*=============================================================================
690 * Public C++ functions
691 *============================================================================*/
692
693/*----------------------------------------------------------------------------*/
701/*----------------------------------------------------------------------------*/
702
703#if defined(HAVE_MPI_IN_PLACE)
704
705inline static void
707 cs_gnum_t cpt[],
708 const int n)
709{
710 if (ec->use_mpi()) {
711 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
712 ec->comm());
713 }
714}
715
716#else
717
718void
720 cs_gnum_t cpt[],
721 const int n);
722
723#endif
724
725/*----------------------------------------------------------------------------*/
733/*----------------------------------------------------------------------------*/
734
735#if defined(HAVE_MPI_IN_PLACE)
736
737inline static void
739 cs_lnum_t cpt[],
740 const int n)
741{
742 if (ec->use_mpi()) {
743 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
744 ec->comm());
745 }
746}
747
748#else
749
750void
752 cs_lnum_t cpt[],
753 const int n);
754
755#endif
756
757/*----------------------------------------------------------------------------*/
766/*----------------------------------------------------------------------------*/
767
768#if defined(HAVE_MPI_IN_PLACE)
769
770inline static void
772 int n,
773 cs_datatype_t datatype,
774 void *val)
775{
776 if (ec->use_mpi()) {
777 MPI_Allreduce(MPI_IN_PLACE, val, n,
778 cs_datatype_to_mpi[datatype], MPI_SUM,
779 ec->comm());
780 }
781}
782
783#else
784
785void
787 int n,
788 cs_datatype_t datatype,
789 void *val);
790
791#endif
792
793/*----------------------------------------------------------------------------*/
803/*----------------------------------------------------------------------------*/
804
805#if defined(HAVE_MPI_IN_PLACE)
806
807inline static void
809 int n,
810 cs_datatype_t datatype,
811 void *val)
812{
813 if (ec->use_mpi()) {
814 MPI_Allreduce(MPI_IN_PLACE, val, n,
815 cs_datatype_to_mpi[datatype], MPI_MAX,
816 ec->comm());
817 }
818}
819
820#else
821
822void
824 int n,
825 cs_datatype_t datatype,
826 void *val);
827
828#endif
829
830/*----------------------------------------------------------------------------*/
840/*----------------------------------------------------------------------------*/
841
842#if defined(HAVE_MPI_IN_PLACE)
843
844inline static void
846 int n,
847 cs_datatype_t datatype,
848 void *val)
849{
850 if (ec->use_mpi()) {
851 MPI_Allreduce(MPI_IN_PLACE, val, n,
852 cs_datatype_to_mpi[datatype], MPI_MIN,
853 ec->comm());
854 }
855}
856
857#else
858
859void
861 int n,
862 cs_datatype_t datatype,
863 void *val);
864
865#endif
866
867/*----------------------------------------------------------------------------*/
882/*----------------------------------------------------------------------------*/
883
884inline static void
886 size_t type_size,
887 int t_id,
888 int n_t,
889 cs_lnum_t *s_id,
890 cs_lnum_t *e_id)
891{
892 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
893 const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
894
895 *s_id = t_id * t_n;
896 *e_id = (t_id+1) * t_n;
897 *s_id = cs_align(*s_id, cl_m);
898 *e_id = cs_align(*e_id, cl_m);
899 if (*s_id > n) *s_id = n;
900 if (*e_id > n) *e_id = n;
901}
902
903/*=============================================================================
904 * Public C++ templates
905 *============================================================================*/
906
907/*----------------------------------------------------------------------------*/
913/*----------------------------------------------------------------------------*/
914
915template <typename T, typename... Vals>
916static void
917cs_parall_sum_scalars
918(
919 T& first,
920 Vals&... values
921)
922{
923#if defined(HAVE_MPI)
924
925 if (cs_glob_n_ranks == 1)
926 return;
927
928 /* Count number of values */
929 constexpr size_t n_vals = sizeof...(Vals);
930
931 /* Set datatype for global communication */
932 cs_datatype_t datatype = cs_datatype_from_type<T>();
933
934 /* Temporary work array and parallel sum */
935 if (n_vals == 0)
936 cs_parall_sum(1, datatype, &first);
937 else {
938 /* Unpack values */
939 T *_values[] = {&values ...};
940
941 T w[n_vals + 1];
942 w[0] = first;
943 for (size_t i = 0; i < n_vals; i++)
944 w[i+1] = *(_values[i]);
945
946 cs_parall_sum(n_vals + 1, datatype, w);
947
948 first = w[0];
949 for (size_t i = 0; i < n_vals; i++)
950 *(_values[i]) = w[i+1];
951 }
952
953#endif // defined(HAVE_MPI)
954}
955
956/*----------------------------------------------------------------------------*/
962/*----------------------------------------------------------------------------*/
963
964template <typename T, typename... Vals>
965static void
966cs_parall_sum_scalars
967(
969 T& first,
970 Vals&... values
971)
972{
973#if defined(HAVE_MPI)
974
975 /* Count number of values */
976 constexpr size_t n_vals = sizeof...(Vals);
977
978 /* Set datatype for global communication */
979 cs_datatype_t datatype = cs_datatype_from_type<T>();
980
981 /* Temporary work array and parallel sum */
982 if (n_vals == 0)
983 cs_parall_sum(ec, 1, datatype, &first);
984 else {
985 /* Unpack values */
986 T *_values[] = {&values ...};
987
988 T w[n_vals + 1];
989 w[0] = first;
990 for (size_t i = 0; i < n_vals; i++)
991 w[i+1] = *(_values[i]);
992
993 cs_parall_sum(ec, n_vals + 1, datatype, w);
994
995 first = w[0];
996 for (size_t i = 0; i < n_vals; i++)
997 *(_values[i]) = w[i+1];
998 }
999
1000#endif // defined(HAVE_MPI)
1001}
1002
1003/*----------------------------------------------------------------------------*/
1010/*----------------------------------------------------------------------------*/
1011
1012template <int Stride, typename T, typename... Vals>
1013static void
1014cs_parall_sum_strided
1015(
1017 T first[],
1018 Vals&&... values
1019)
1020{
1021#if defined(HAVE_MPI)
1022
1023 /* Count number of values */
1024 constexpr size_t n_vals = sizeof...(Vals);
1025
1026 /* Set datatype for global communication */
1027 cs_datatype_t datatype = cs_datatype_from_type<T>();
1028
1029 /* Temporary work array and parallel sum */
1030 if (n_vals == 0)
1031 cs_parall_sum(ec, Stride, datatype, first);
1032 else {
1033 /* Unpack values */
1034 T *_values[] = {values ...};
1035
1036 constexpr size_t work_size = (n_vals + 1) * Stride;
1037
1038 T w[work_size];
1039 for (int i = 0; i < Stride; i++)
1040 w[i] = first[i];
1041
1042 for (size_t i = 0; i < n_vals; i++)
1043 for (int j = 0; j < Stride; j++)
1044 w[(i+1)*Stride + j] = _values[i][j];
1045
1046 cs_parall_sum(ec, work_size, datatype, w);
1047
1048 for (int i = 0; i < Stride; i++)
1049 first[i] = w[i];
1050
1051 for (size_t i = 0; i < n_vals; i++) {
1052 for (int j = 0; j < Stride; j++)
1053 _values[i][j] = w[(i+1)*Stride + j];
1054 }
1055 }
1056
1057#endif // defined(HAVE_MPI)
1058}
1059
1060/*----------------------------------------------------------------------------*/
1068/*----------------------------------------------------------------------------*/
1069
1070template <int Stride, typename T, typename... Vals>
1071static void
1072cs_parall_sum_strided
1073(
1074 T first[],
1075 Vals&&... values
1076)
1077{
1078#if defined(HAVE_MPI)
1079
1080 if (cs_glob_n_ranks == 1)
1081 return;
1082
1083 /* Count number of values */
1084 constexpr size_t n_vals = sizeof...(Vals);
1085
1086 /* Set datatype for global communication */
1087 cs_datatype_t datatype = cs_datatype_from_type<T>();
1088
1089 /* Temporary work array and parallel sum */
1090 if (n_vals == 0)
1091 cs_parall_sum(Stride, datatype, first);
1092 else {
1093 /* Unpack values */
1094 T *_values[] = {values ...};
1095
1096 constexpr size_t work_size = (n_vals + 1) * Stride;
1097
1098 T w[work_size];
1099 for (int i = 0; i < Stride; i++)
1100 w[i] = first[i];
1101
1102 for (size_t i = 0; i < n_vals; i++)
1103 for (int j = 0; j < Stride; j++)
1104 w[(i+1)*Stride + j] = _values[i][j];
1105
1106 cs_parall_sum(work_size, datatype, w);
1107
1108 for (int i = 0; i < Stride; i++)
1109 first[i] = w[i];
1110
1111 for (size_t i = 0; i < n_vals; i++) {
1112 for (int j = 0; j < Stride; j++)
1113 _values[i][j] = w[(i+1)*Stride + j];
1114 }
1115 }
1116
1117#endif // defined(HAVE_MPI)
1118}
1119
1120/*----------------------------------------------------------------------------*/
1127/*----------------------------------------------------------------------------*/
1128
1129template <typename T, typename... Vals>
1130static void
1131cs_parall_max_scalars
1132(
1133 T& first,
1134 Vals&... values
1135)
1136{
1137#if defined(HAVE_MPI)
1138
1139 if (cs_glob_n_ranks == 1)
1140 return;
1141
1142 /* Count number of values */
1143 constexpr size_t n_vals = sizeof...(Vals);
1144
1145 /* Set datatype for global communication */
1146 cs_datatype_t datatype = cs_datatype_from_type<T>();
1147
1148 /* Temporary work array and parallel sum */
1149 if (n_vals == 0)
1150 cs_parall_max(1, datatype, &first);
1151 else {
1152
1153 /* Unpack values */
1154 T *_values[] = {&values ...};
1155
1156 T w[n_vals + 1];
1157 w[0] = first;
1158 for (size_t i = 0; i < n_vals; i++)
1159 w[i+1] = *(_values[i]);
1160
1161 cs_parall_max(n_vals + 1, datatype, w);
1162
1163 first = w[0];
1164 for (size_t i = 0; i < n_vals; i++)
1165 *(_values[i]) = w[i+1];
1166 }
1167
1168#endif // defined(HAVE_MPI)
1169}
1170
1171/*----------------------------------------------------------------------------*/
1177/*----------------------------------------------------------------------------*/
1178
1179template <typename T, typename... Vals>
1180static void
1181cs_parall_max_scalars
1182(
1184 T& first,
1185 Vals&... values
1186)
1187{
1188#if defined(HAVE_MPI)
1189
1190 /* Count number of values */
1191 constexpr size_t n_vals = sizeof...(Vals);
1192
1193 /* Set datatype for global communication */
1194 cs_datatype_t datatype = cs_datatype_from_type<T>();
1195
1196 /* Temporary work array and parallel sum */
1197 if (n_vals == 0)
1198 cs_parall_max(ec, 1, datatype, &first);
1199 else {
1200
1201 /* Unpack values */
1202 T *_values[] = {&values ...};
1203
1204 T w[n_vals + 1];
1205 w[0] = first;
1206 for (size_t i = 0; i < n_vals; i++)
1207 w[i+1] = *(_values[i]);
1208
1209 cs_parall_max(ec, n_vals + 1, datatype, w);
1210
1211 first = w[0];
1212 for (size_t i = 0; i < n_vals; i++)
1213 *(_values[i]) = w[i+1];
1214 }
1215
1216#endif // defined(HAVE_MPI)
1217}
1218
1219/*----------------------------------------------------------------------------*/
1227/*----------------------------------------------------------------------------*/
1228
1229template <int Stride, typename T, typename... Vals>
1230static void
1231cs_parall_max_strided
1232(
1233 T first[],
1234 Vals&&... values
1235)
1236{
1237#if defined(HAVE_MPI)
1238
1239 if (cs_glob_n_ranks == 1)
1240 return;
1241
1242 /* Count number of values */
1243 constexpr size_t n_vals = sizeof...(Vals);
1244
1245 /* Set datatype for global communication */
1246 cs_datatype_t datatype = cs_datatype_from_type<T>();
1247
1248 /* Temporary work array and parallel sum */
1249 if (n_vals == 0)
1250 cs_parall_max(Stride, datatype, first);
1251 else {
1252 /* Unpack values */
1253 T *_values[] = {values ...};
1254
1255 constexpr size_t work_size = (n_vals + 1) * Stride;
1256
1257 T w[work_size];
1258 for (int i = 0; i < Stride; i++)
1259 w[i] = first[i];
1260
1261 for (size_t i = 0; i < n_vals; i++)
1262 for (int j = 0; j < Stride; j++)
1263 w[(i+1)*Stride + j] = _values[i][j];
1264
1265 cs_parall_max(work_size, datatype, w);
1266
1267 for (int i = 0; i < Stride; i++)
1268 first[i] = w[i];
1269
1270 for (size_t i = 0; i < n_vals; i++)
1271 for (int j = 0; j < Stride; j++)
1272 _values[i][j] = w[(i+1)*Stride + j];
1273 }
1274
1275#endif // defined(HAVE_MPI)
1276}
1277
1278/*----------------------------------------------------------------------------*/
1285/*----------------------------------------------------------------------------*/
1286
1287template <int Stride, typename T, typename... Vals>
1288static void
1289cs_parall_max_strided
1290(
1292 T first[],
1293 Vals&&... values
1294)
1295{
1296#if defined(HAVE_MPI)
1297
1298 /* Count number of values */
1299 constexpr size_t n_vals = sizeof...(Vals);
1300
1301 /* Set datatype for global communication */
1302 cs_datatype_t datatype = cs_datatype_from_type<T>();
1303
1304 /* Temporary work array and parallel sum */
1305 if (n_vals == 0)
1306 cs_parall_max(ec, Stride, datatype, first);
1307 else {
1308 /* Unpack values */
1309 T *_values[] = {values ...};
1310
1311 constexpr size_t work_size = (n_vals + 1) * Stride;
1312
1313 T w[work_size];
1314 for (int i = 0; i < Stride; i++)
1315 w[i] = first[i];
1316
1317 for (size_t i = 0; i < n_vals; i++)
1318 for (int j = 0; j < Stride; j++)
1319 w[(i+1)*Stride + j] = _values[i][j];
1320
1321 cs_parall_max(ec, work_size, datatype, w);
1322
1323 for (int i = 0; i < Stride; i++)
1324 first[i] = w[i];
1325
1326 for (size_t i = 0; i < n_vals; i++)
1327 for (int j = 0; j < Stride; j++)
1328 _values[i][j] = w[(i+1)*Stride + j];
1329 }
1330
1331#endif // defined(HAVE_MPI)
1332}
1333
1334/*----------------------------------------------------------------------------*/
1341/*----------------------------------------------------------------------------*/
1342
1343template <typename T, typename... Vals>
1344static void
1345cs_parall_min_scalars
1346(
1347 T& first,
1348 Vals&... values
1349)
1350{
1351#if defined(HAVE_MPI)
1352
1353 if (cs_glob_n_ranks == 1)
1354 return;
1355
1356 /* Count number of values */
1357 constexpr size_t n_vals = sizeof...(Vals);
1358
1359 /* Set datatype for global communication */
1360 cs_datatype_t datatype = cs_datatype_from_type<T>();
1361
1362 if (n_vals == 0)
1363 cs_parall_min(1, datatype, &first);
1364
1365 else {
1366 /* Temporary work array and parallel sum */
1367
1368 /* Unpack values */
1369 T *_values[] = {&values ...};
1370
1371 T w[n_vals + 1];
1372 w[0] = first;
1373 for (size_t i = 0; i < n_vals; i++)
1374 w[i + 1] = *(_values[i]);
1375
1376 cs_parall_min(n_vals + 1, datatype, w);
1377
1378 first = w[0];
1379 for (size_t i = 0; i < n_vals; i++)
1380 *(_values[i]) = w[i + 1];
1381 }
1382
1383#endif // defined(HAVE_MPI)
1384}
1385
1386/*----------------------------------------------------------------------------*/
1392/*----------------------------------------------------------------------------*/
1393
1394template <typename T, typename... Vals>
1395static void
1396cs_parall_min_scalars
1397(
1399 T& first,
1400 Vals&... values
1401)
1402{
1403#if defined(HAVE_MPI)
1404
1405 /* Count number of values */
1406 constexpr size_t n_vals = sizeof...(Vals);
1407
1408 /* Set datatype for global communication */
1409 cs_datatype_t datatype = cs_datatype_from_type<T>();
1410
1411 if (n_vals == 0)
1412 cs_parall_min(ec, 1, datatype, &first);
1413
1414 else {
1415 /* Temporary work array and parallel sum */
1416
1417 /* Unpack values */
1418 T *_values[] = {&values ...};
1419
1420 T w[n_vals + 1];
1421 w[0] = first;
1422 for (size_t i = 0; i < n_vals; i++)
1423 w[i + 1] = *(_values[i]);
1424
1425 cs_parall_min(ec, n_vals + 1, datatype, w);
1426
1427 first = w[0];
1428 for (size_t i = 0; i < n_vals; i++)
1429 *(_values[i]) = w[i + 1];
1430 }
1431
1432#endif // defined(HAVE_MPI)
1433}
1434
1435/*----------------------------------------------------------------------------*/
1443/*----------------------------------------------------------------------------*/
1444
1445template <int Stride, typename T, typename... Vals>
1446static void
1447cs_parall_min_strided
1448(
1449 T first[],
1450 Vals&&... values
1451)
1452{
1453#if defined(HAVE_MPI)
1454
1455 if (cs_glob_n_ranks == 1)
1456 return;
1457
1458 /* Count number of values */
1459 constexpr size_t n_vals = sizeof...(Vals);
1460
1461 /* Set datatype for global communication */
1462 cs_datatype_t datatype = cs_datatype_from_type<T>();
1463
1464 /* Temporary work array and parallel sum */
1465 if (n_vals == 0)
1466 cs_parall_min(Stride, datatype, first);
1467 else {
1468 /* Unpack values */
1469 T *_values[] = {values ...};
1470
1471 constexpr size_t work_size = (n_vals + 1) * Stride;
1472
1473 T w[work_size];
1474 for (int i = 0; i < Stride; i++)
1475 w[i] = first[i];
1476
1477 for (size_t i = 0; i < n_vals; i++)
1478 for (int j = 0; j < Stride; j++)
1479 w[(i+1)*Stride + j] = _values[i][j];
1480
1481 cs_parall_min(work_size, datatype, w);
1482
1483 for (int i = 0; i < Stride; i++)
1484 first[i] = w[i];
1485
1486 for (size_t i = 0; i < n_vals; i++)
1487 for (int j = 0; j < Stride; j++)
1488 _values[i][j] = w[(i+1)*Stride + j];
1489 }
1490
1491#endif
1492}
1493
1494/*----------------------------------------------------------------------------*/
1501/*----------------------------------------------------------------------------*/
1502
1503#if defined(HAVE_MPI)
1504
1505template <int Stride, typename T, typename... Vals>
1506static void
1507cs_parall_min_strided
1508(
1510 T first[],
1511 Vals&&... values
1512)
1513{
1514 /* Count number of values */
1515 constexpr size_t n_vals = sizeof...(Vals);
1516
1517 /* Set datatype for global communication */
1518 cs_datatype_t datatype = cs_datatype_from_type<T>();
1519
1520 /* Temporary work array and parallel sum */
1521 if (n_vals == 0)
1522 cs_parall_min(ec, Stride, datatype, first);
1523
1524 else {
1525 /* Unpack values */
1526 T *_values[] = {values ...};
1527
1528 constexpr size_t work_size = (n_vals + 1) * Stride;
1529
1530 T w[work_size];
1531 for (int i = 0; i < Stride; i++)
1532 w[i] = first[i];
1533
1534 for (size_t i = 0; i < n_vals; i++)
1535 for (int j = 0; j < Stride; j++)
1536 w[(i+1)*Stride + j] = _values[i][j];
1537
1538 cs_parall_min(ec, work_size, datatype, w);
1539
1540 for (int i = 0; i < Stride; i++)
1541 first[i] = w[i];
1542
1543 for (size_t i = 0; i < n_vals; i++)
1544 for (int j = 0; j < Stride; j++)
1545 _values[i][j] = w[(i+1)*Stride + j];
1546 }
1547}
1548
1549#endif // defined(HAVE_MPI)
1550
1551#endif //__cplusplus
1552
1553/*----------------------------------------------------------------------------*/
1554
1555#endif /* __CS_PARALL_H__ */
Definition: cs_execution_context.h:61
bool use_mpi() const
Does the execution context uses MPI parallelism ?
Definition: cs_execution_context.h:128
MPI_Comm comm() const
Getter function for MPI communicator.
Definition: cs_execution_context.h:227
int cs_glob_n_ranks
Definition: cs_defs.cpp:175
MPI_Datatype cs_datatype_to_mpi[]
Definition: cs_defs.cpp:157
MPI_Comm cs_glob_mpi_comm
Definition: cs_defs.cpp:183
cs_datatype_t
Definition: cs_defs.h:300
#define BEGIN_C_DECLS
Definition: cs_defs.h:542
double cs_real_t
Floating-point value.
Definition: cs_defs.h:342
#define CS_MPI_LNUM
Definition: cs_defs.h:438
#define CS_MPI_GNUM
Definition: cs_defs.h:418
uint64_t cs_gnum_t
global mesh entity number
Definition: cs_defs.h:325
static cs_lnum_t cs_align(cs_lnum_t i, cs_lnum_t m)
Given a base index i, return the next index aligned with a size m.
Definition: cs_defs.h:652
#define CS_UNUSED(x)
Definition: cs_defs.h:528
#define END_C_DECLS
Definition: cs_defs.h:543
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:335
#define CS_CL_SIZE
Definition: cs_defs.h:498
void cs_parall_gather_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[])
Build a global array on the given root rank from all local arrays.
Definition: cs_parall.cpp:1030
static void cs_parall_bcast(int root_rank, int n, cs_datatype_t datatype, void *val)
Broadcast values of a given datatype to all default communicator processes.
Definition: cs_parall.h:248
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:1353
void cs_parall_gather_ordered_r(int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array on the given root rank from all local arrays.
Definition: cs_parall.cpp:1097
void cs_parall_min_id_rank_r(cs_lnum_t *elt_id, int *rank_id, cs_real_t val)
Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum va...
Definition: cs_parall.cpp:855
static void cs_parall_thread_range_upper(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread for upper triangular matrix elements.
Definition: cs_parall.h:634
static void cs_parall_max(int n, cs_datatype_t datatype, void *val)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:180
static void cs_parall_counter_max(cs_lnum_t cpt[], const int n)
Maximum values of a counter on all default communicator processes.
Definition: cs_parall.h:117
void cs_parall_allgather_r(int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[])
Build a global array from each local array in each domain.
Definition: cs_parall.cpp:909
static int cs_parall_n_threads(cs_lnum_t n_elements, cs_lnum_t min_thread_elements)
Compute recommended number of threads for a section.
Definition: cs_parall.h:555
static void cs_parall_counter(cs_gnum_t cpt[], const int n)
Sum values of a counter on all default communicator processes.
Definition: cs_parall.h:88
void cs_parall_scatter_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.cpp:1146
static void cs_parall_sum(int n, cs_datatype_t datatype, void *val)
Sum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:147
void cs_parall_allgather_ordered_r(int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array from each local array in each domain.
Definition: cs_parall.cpp:986
void cs_parall_scatter_f(int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.cpp:1276
static void cs_parall_thread_range(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread. When called inside an OpenMP parallel section,...
Definition: cs_parall.h:589
void cs_parall_min_loc_vals(int n, cs_real_t *min, cs_real_t min_loc_vals[])
Minimum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.cpp:816
static void cs_parall_min(int n, cs_datatype_t datatype, void *val)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:213
void cs_parall_gather_f(int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[])
Build a global array on the given root rank from all local arrays. Function dealing with single-preci...
Definition: cs_parall.cpp:1211
void cs_parall_max_loc_vals(int n, cs_real_t *max, cs_real_t max_loc_vals[])
Maximum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.cpp:778
size_t cs_parall_get_min_coll_buf_size(void)
Return minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:1331
static size_t cs_parall_block_count(size_t n, size_t block_size)
Compute number of blocks needed for a given array and block sizes.
Definition: cs_parall.h:677
cs_e2n_sum_t
Definition: cs_parall.h:52
@ CS_E2N_SUM_SCATTER_ATOMIC
Definition: cs_parall.h:57
@ CS_E2N_SUM_SCATTER
Definition: cs_parall.h:54
@ CS_E2N_SUM_GATHER
Definition: cs_parall.h:59
cs_e2n_sum_t cs_glob_e2n_sum_type