85#if defined(HAVE_MPI_IN_PLACE)
92 MPI_Allreduce(MPI_IN_PLACE, cpt, n,
CS_MPI_GNUM, MPI_SUM,
114#if defined(HAVE_MPI_IN_PLACE)
121 MPI_Allreduce(MPI_IN_PLACE, cpt, n,
CS_MPI_LNUM, MPI_MAX,
144#if defined(HAVE_MPI_IN_PLACE)
177#if defined(HAVE_MPI_IN_PLACE)
210#if defined(HAVE_MPI_IN_PLACE)
260#define cs_parall_bcast(_root_rank, _n, _datatype, _val);
458 const float g_array[],
558#if defined(HAVE_OPENMP)
559 int n_t = omp_get_max_threads();
560 int n_t_l = n_elements / min_thread_elements;
594#if defined(HAVE_OPENMP)
595 const int t_id = omp_get_thread_num();
596 const int n_t = omp_get_num_threads();
597 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
601 *e_id = (t_id+1) * t_n;
604 if (*e_id > n) *e_id = n;
639#if defined(HAVE_OPENMP)
640 const int t_id = omp_get_thread_num();
641 const double n_t = omp_get_num_threads();
644 double r0 = (double)t_id / (
double)n_t;
645 double r1 = (double)(t_id+1) / (double)n_t;
657 if (*e_id > n) *e_id = n;
680 return (n % block_size) ? n/block_size + 1 : n/block_size;
687#if defined(__cplusplus)
703#if defined(HAVE_MPI_IN_PLACE)
711 MPI_Allreduce(MPI_IN_PLACE, cpt, n,
CS_MPI_GNUM, MPI_SUM,
735#if defined(HAVE_MPI_IN_PLACE)
743 MPI_Allreduce(MPI_IN_PLACE, cpt, n,
CS_MPI_LNUM, MPI_MAX,
768#if defined(HAVE_MPI_IN_PLACE)
777 MPI_Allreduce(MPI_IN_PLACE, val, n,
805#if defined(HAVE_MPI_IN_PLACE)
814 MPI_Allreduce(MPI_IN_PLACE, val, n,
842#if defined(HAVE_MPI_IN_PLACE)
851 MPI_Allreduce(MPI_IN_PLACE, val, n,
892 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
896 *e_id = (t_id+1) * t_n;
899 if (*s_id > n) *s_id = n;
900 if (*e_id > n) *e_id = n;
915template <
typename T,
typename... Vals>
929 constexpr size_t n_vals =
sizeof...(Vals);
939 T *_values[] = {&values ...};
943 for (
size_t i = 0; i < n_vals; i++)
944 w[i+1] = *(_values[i]);
949 for (
size_t i = 0; i < n_vals; i++)
950 *(_values[i]) = w[i+1];
964template <
typename T,
typename... Vals>
976 constexpr size_t n_vals =
sizeof...(Vals);
986 T *_values[] = {&values ...};
990 for (
size_t i = 0; i < n_vals; i++)
991 w[i+1] = *(_values[i]);
996 for (
size_t i = 0; i < n_vals; i++)
997 *(_values[i]) = w[i+1];
1012template <
int Stride,
typename T,
typename... Vals>
1014cs_parall_sum_strided
1021#if defined(HAVE_MPI)
1024 constexpr size_t n_vals =
sizeof...(Vals);
1034 T *_values[] = {values ...};
1036 constexpr size_t work_size = (n_vals + 1) * Stride;
1039 for (
int i = 0; i < Stride; i++)
1042 for (
size_t i = 0; i < n_vals; i++)
1043 for (
int j = 0; j < Stride; j++)
1044 w[(i+1)*Stride + j] = _values[i][j];
1048 for (
int i = 0; i < Stride; i++)
1051 for (
size_t i = 0; i < n_vals; i++) {
1052 for (
int j = 0; j < Stride; j++)
1053 _values[i][j] = w[(i+1)*Stride + j];
1070template <
int Stride,
typename T,
typename... Vals>
1072cs_parall_sum_strided
1078#if defined(HAVE_MPI)
1084 constexpr size_t n_vals =
sizeof...(Vals);
1094 T *_values[] = {values ...};
1096 constexpr size_t work_size = (n_vals + 1) * Stride;
1099 for (
int i = 0; i < Stride; i++)
1102 for (
size_t i = 0; i < n_vals; i++)
1103 for (
int j = 0; j < Stride; j++)
1104 w[(i+1)*Stride + j] = _values[i][j];
1108 for (
int i = 0; i < Stride; i++)
1111 for (
size_t i = 0; i < n_vals; i++) {
1112 for (
int j = 0; j < Stride; j++)
1113 _values[i][j] = w[(i+1)*Stride + j];
1129template <
typename T,
typename... Vals>
1131cs_parall_max_scalars
1137#if defined(HAVE_MPI)
1143 constexpr size_t n_vals =
sizeof...(Vals);
1154 T *_values[] = {&values ...};
1158 for (
size_t i = 0; i < n_vals; i++)
1159 w[i+1] = *(_values[i]);
1164 for (
size_t i = 0; i < n_vals; i++)
1165 *(_values[i]) = w[i+1];
1179template <
typename T,
typename... Vals>
1181cs_parall_max_scalars
1188#if defined(HAVE_MPI)
1191 constexpr size_t n_vals =
sizeof...(Vals);
1202 T *_values[] = {&values ...};
1206 for (
size_t i = 0; i < n_vals; i++)
1207 w[i+1] = *(_values[i]);
1212 for (
size_t i = 0; i < n_vals; i++)
1213 *(_values[i]) = w[i+1];
1229template <
int Stride,
typename T,
typename... Vals>
1231cs_parall_max_strided
1237#if defined(HAVE_MPI)
1243 constexpr size_t n_vals =
sizeof...(Vals);
1253 T *_values[] = {values ...};
1255 constexpr size_t work_size = (n_vals + 1) * Stride;
1258 for (
int i = 0; i < Stride; i++)
1261 for (
size_t i = 0; i < n_vals; i++)
1262 for (
int j = 0; j < Stride; j++)
1263 w[(i+1)*Stride + j] = _values[i][j];
1267 for (
int i = 0; i < Stride; i++)
1270 for (
size_t i = 0; i < n_vals; i++)
1271 for (
int j = 0; j < Stride; j++)
1272 _values[i][j] = w[(i+1)*Stride + j];
1287template <
int Stride,
typename T,
typename... Vals>
1289cs_parall_max_strided
1296#if defined(HAVE_MPI)
1299 constexpr size_t n_vals =
sizeof...(Vals);
1309 T *_values[] = {values ...};
1311 constexpr size_t work_size = (n_vals + 1) * Stride;
1314 for (
int i = 0; i < Stride; i++)
1317 for (
size_t i = 0; i < n_vals; i++)
1318 for (
int j = 0; j < Stride; j++)
1319 w[(i+1)*Stride + j] = _values[i][j];
1323 for (
int i = 0; i < Stride; i++)
1326 for (
size_t i = 0; i < n_vals; i++)
1327 for (
int j = 0; j < Stride; j++)
1328 _values[i][j] = w[(i+1)*Stride + j];
1343template <
typename T,
typename... Vals>
1345cs_parall_min_scalars
1351#if defined(HAVE_MPI)
1357 constexpr size_t n_vals =
sizeof...(Vals);
1369 T *_values[] = {&values ...};
1373 for (
size_t i = 0; i < n_vals; i++)
1374 w[i + 1] = *(_values[i]);
1379 for (
size_t i = 0; i < n_vals; i++)
1380 *(_values[i]) = w[i + 1];
1394template <
typename T,
typename... Vals>
1396cs_parall_min_scalars
1403#if defined(HAVE_MPI)
1406 constexpr size_t n_vals =
sizeof...(Vals);
1418 T *_values[] = {&values ...};
1422 for (
size_t i = 0; i < n_vals; i++)
1423 w[i + 1] = *(_values[i]);
1428 for (
size_t i = 0; i < n_vals; i++)
1429 *(_values[i]) = w[i + 1];
1445template <
int Stride,
typename T,
typename... Vals>
1447cs_parall_min_strided
1453#if defined(HAVE_MPI)
1459 constexpr size_t n_vals =
sizeof...(Vals);
1469 T *_values[] = {values ...};
1471 constexpr size_t work_size = (n_vals + 1) * Stride;
1474 for (
int i = 0; i < Stride; i++)
1477 for (
size_t i = 0; i < n_vals; i++)
1478 for (
int j = 0; j < Stride; j++)
1479 w[(i+1)*Stride + j] = _values[i][j];
1483 for (
int i = 0; i < Stride; i++)
1486 for (
size_t i = 0; i < n_vals; i++)
1487 for (
int j = 0; j < Stride; j++)
1488 _values[i][j] = w[(i+1)*Stride + j];
1503#if defined(HAVE_MPI)
1505template <
int Stride,
typename T,
typename... Vals>
1507cs_parall_min_strided
1515 constexpr size_t n_vals =
sizeof...(Vals);
1526 T *_values[] = {values ...};
1528 constexpr size_t work_size = (n_vals + 1) * Stride;
1531 for (
int i = 0; i < Stride; i++)
1534 for (
size_t i = 0; i < n_vals; i++)
1535 for (
int j = 0; j < Stride; j++)
1536 w[(i+1)*Stride + j] = _values[i][j];
1540 for (
int i = 0; i < Stride; i++)
1543 for (
size_t i = 0; i < n_vals; i++)
1544 for (
int j = 0; j < Stride; j++)
1545 _values[i][j] = w[(i+1)*Stride + j];
Definition: cs_execution_context.h:61
bool use_mpi() const
Does the execution context uses MPI parallelism ?
Definition: cs_execution_context.h:128
MPI_Comm comm() const
Getter function for MPI communicator.
Definition: cs_execution_context.h:227
int cs_glob_n_ranks
Definition: cs_defs.cpp:175
MPI_Datatype cs_datatype_to_mpi[]
Definition: cs_defs.cpp:157
MPI_Comm cs_glob_mpi_comm
Definition: cs_defs.cpp:183
cs_datatype_t
Definition: cs_defs.h:300
#define BEGIN_C_DECLS
Definition: cs_defs.h:542
double cs_real_t
Floating-point value.
Definition: cs_defs.h:342
#define CS_MPI_LNUM
Definition: cs_defs.h:438
#define CS_MPI_GNUM
Definition: cs_defs.h:418
uint64_t cs_gnum_t
global mesh entity number
Definition: cs_defs.h:325
static cs_lnum_t cs_align(cs_lnum_t i, cs_lnum_t m)
Given a base index i, return the next index aligned with a size m.
Definition: cs_defs.h:652
#define CS_UNUSED(x)
Definition: cs_defs.h:528
#define END_C_DECLS
Definition: cs_defs.h:543
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:335
#define CS_CL_SIZE
Definition: cs_defs.h:498
void cs_parall_gather_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[])
Build a global array on the given root rank from all local arrays.
Definition: cs_parall.cpp:1030
static void cs_parall_bcast(int root_rank, int n, cs_datatype_t datatype, void *val)
Broadcast values of a given datatype to all default communicator processes.
Definition: cs_parall.h:248
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:1353
void cs_parall_gather_ordered_r(int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array on the given root rank from all local arrays.
Definition: cs_parall.cpp:1097
void cs_parall_min_id_rank_r(cs_lnum_t *elt_id, int *rank_id, cs_real_t val)
Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum va...
Definition: cs_parall.cpp:855
static void cs_parall_thread_range_upper(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread for upper triangular matrix elements.
Definition: cs_parall.h:634
static void cs_parall_max(int n, cs_datatype_t datatype, void *val)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:180
static void cs_parall_counter_max(cs_lnum_t cpt[], const int n)
Maximum values of a counter on all default communicator processes.
Definition: cs_parall.h:117
void cs_parall_allgather_r(int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[])
Build a global array from each local array in each domain.
Definition: cs_parall.cpp:909
static int cs_parall_n_threads(cs_lnum_t n_elements, cs_lnum_t min_thread_elements)
Compute recommended number of threads for a section.
Definition: cs_parall.h:555
static void cs_parall_counter(cs_gnum_t cpt[], const int n)
Sum values of a counter on all default communicator processes.
Definition: cs_parall.h:88
void cs_parall_scatter_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.cpp:1146
static void cs_parall_sum(int n, cs_datatype_t datatype, void *val)
Sum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:147
void cs_parall_allgather_ordered_r(int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array from each local array in each domain.
Definition: cs_parall.cpp:986
void cs_parall_scatter_f(int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.cpp:1276
static void cs_parall_thread_range(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread. When called inside an OpenMP parallel section,...
Definition: cs_parall.h:589
void cs_parall_min_loc_vals(int n, cs_real_t *min, cs_real_t min_loc_vals[])
Minimum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.cpp:816
static void cs_parall_min(int n, cs_datatype_t datatype, void *val)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:213
void cs_parall_gather_f(int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[])
Build a global array on the given root rank from all local arrays. Function dealing with single-preci...
Definition: cs_parall.cpp:1211
void cs_parall_max_loc_vals(int n, cs_real_t *max, cs_real_t max_loc_vals[])
Maximum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.cpp:778
size_t cs_parall_get_min_coll_buf_size(void)
Return minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:1331
static size_t cs_parall_block_count(size_t n, size_t block_size)
Compute number of blocks needed for a given array and block sizes.
Definition: cs_parall.h:677
cs_e2n_sum_t
Definition: cs_parall.h:52
@ CS_E2N_SUM_SCATTER_ATOMIC
Definition: cs_parall.h:57
@ CS_E2N_SUM_SCATTER
Definition: cs_parall.h:54
@ CS_E2N_SUM_GATHER
Definition: cs_parall.h:59
cs_e2n_sum_t cs_glob_e2n_sum_type