34 #ifdef OOMPH_TRANSITION_TO_VERSION_3
54 const double& max_error,
55 const double& min_error,
77 const unsigned& ndomain,
84 if (nelem != element_domain.size())
86 std::ostringstream error_stream;
87 error_stream <<
"element_domain Vector has wrong length " << nelem <<
" "
88 << element_domain.size() << std::endl;
91 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
97 unsigned nel_per_domain = int(
float(nelem) /
float(ndomain));
98 for (
unsigned ielem = 0; ielem < nelem; ielem++)
100 unsigned idomain = unsigned(
float(ielem) /
float(nel_per_domain));
101 element_domain[ielem] = idomain;
117 const unsigned& ndomain,
118 const unsigned& objective,
125 unsigned nelem = mesh_pt->
nelement();
128 if (nelem != element_domain.size())
130 std::ostringstream error_stream;
131 error_stream <<
"element_domain Vector has wrong length " << nelem <<
" "
132 << element_domain.size() << std::endl;
135 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
143 clock_t cpu_start = clock();
146 std::map<unsigned, std::set<unsigned>> elements_connected_with_global_eqn;
149 std::set<unsigned> all_global_eqns;
152 for (
unsigned e = 0;
e < nelem;
e++)
157 unsigned ndof = el_pt->
ndof();
158 for (
unsigned j = 0; j < ndof; j++)
162 elements_connected_with_global_eqn[eqn_number].insert(
e);
163 all_global_eqns.insert(eqn_number);
175 for (std::set<unsigned>::iterator it = all_global_eqns.begin();
176 it != all_global_eqns.end();
180 std::set<unsigned> elements = elements_connected_with_global_eqn[*it];
184 for (std::set<unsigned>::iterator it1 = elements.begin();
185 it1 != elements.end();
188 for (std::set<unsigned>::iterator it2 = elements.begin();
189 it2 != elements.end();
192 if ((*it1) != (*it2))
194 connected_elements[(*it1)].insert(*it2);
202 int* xadj =
new int[nelem + 1];
206 adjacency_vector.reserve(count);
212 for (
unsigned e = 0;
e < nelem;
e++)
218 typedef std::set<unsigned>::iterator IT;
219 for (IT it = connected_elements[
e].begin();
220 it != connected_elements[
e].end();
224 adjacency_vector.push_back(*it);
231 xadj[
e + 1] = ientry;
235 clock_t cpu_end = clock();
238 double cpu0 = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
240 <<
"CPU time for setup of METIS data structures [nelem="
241 << nelem <<
"]: " << cpu0 <<
" sec" << std::endl;
249 if (adjacency_vector.size() == 0)
253 <<
"Note: All elements in the Problem's Mesh appear to be\n"
254 <<
"unconnected. This happens, e.g. if all elements only have\n"
255 <<
"internal Data. Bypassing metis and distributing elements\n"
256 <<
"in round-robin fashion amongst the " << n_proc <<
" processors."
258 for (
unsigned e = 0;
e < nelem;
e++)
260 element_domain[
e] =
e % n_proc;
289 int nparts = ndomain;
292 int* options =
new int[10];
295 #ifdef OOMPH_TRANSITION_TO_VERSION_3
300 options[0] = METIS_OBJTYPE_CUT;
305 options[0] = METIS_OBJTYPE_VOL;
309 std::ostringstream error_stream;
310 error_stream <<
"Wrong objective for METIS. objective = " << objective
314 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
319 int* edgecut =
new int[nelem];
322 int* part =
new int[nelem];
326 unsigned n_mesh = problem_pt->
nsub_mesh();
337 <<
"Biasing element distribution via spatial error estimate\n";
341 vwgt =
new int[nelem];
346 mesh_pt, elemental_error);
349 *(std::max_element(elemental_error.begin(), elemental_error.end()));
351 *(std::min_element(elemental_error.begin(), elemental_error.end()));
355 for (
unsigned e = 0;
e < nelem;
e++)
359 elemental_error[
e], max_error, min_error, weight);
368 bool refineable_submesh_exists =
false;
374 for (
unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
377 loop_helper[i_mesh + 1] =
384 refineable_submesh_exists =
true;
389 if (refineable_submesh_exists)
395 <<
"Biasing element distribution via spatial error estimate\n";
399 vwgt =
new int[nelem];
402 for (
unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
410 loop_helper[i_mesh + 1] - loop_helper[i_mesh];
413 problem_pt->
mesh_pt(i_mesh), elemental_error);
415 double max_error = *(std::max_element(elemental_error.begin(),
416 elemental_error.end()));
417 double min_error = *(std::min_element(elemental_error.begin(),
418 elemental_error.end()));
422 unsigned start = loop_helper[i_mesh];
423 unsigned end = loop_helper[i_mesh + 1];
424 for (
unsigned e =
start;
e < end;
e++)
426 unsigned error_index =
e -
start;
429 elemental_error[error_index], max_error, min_error, weight);
437 unsigned start = loop_helper[i_mesh];
438 unsigned end = loop_helper[i_mesh + 1];
439 for (
unsigned e =
start;
e < end;
e++)
449 #ifdef OOMPH_TRANSITION_TO_VERSION_3
454 &adjacency_vector[0],
472 &adjacency_vector[0],
482 else if (objective == 1)
488 &adjacency_vector[0],
500 std::ostringstream error_stream;
501 error_stream <<
"Wrong objective for METIS. objective = " << objective
505 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
510 std::vector<bool> done(nparts,
false);
514 for (
unsigned e = 0;
e < nelem;
e++)
516 element_domain[
e] = part[
e];
518 done[part[
e]] =
true;
525 std::ostringstream error_stream;
527 for (
int p = 0; p < nparts; p++)
532 error_stream <<
"No elements on processor " << p
533 <<
"when trying to partition " << nelem <<
"elements over "
534 << nparts <<
" processors!\n";
540 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
549 double cpu1 = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
551 <<
"CPU time for METIS mesh partitioning [nelem="
552 << nelem <<
"]: " << cpu1 <<
" sec" << std::endl;
591 const unsigned& objective,
593 const bool& bypass_metis)
596 clock_t cpu_start = clock();
602 unsigned n_proc = comm_pt->nproc();
603 unsigned my_rank = comm_pt->my_rank();
609 unsigned n_elem = mesh_pt->
nelement();
616 unsigned n = elemental_assembly_time.size();
617 if ((n != 0) && (n != n_elem))
619 std::ostringstream error_stream;
620 error_stream <<
"Number of elements doesn't match the \n"
621 <<
"number of elemental assembly times: " << n_elem <<
" "
624 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
629 bool can_load_balance_on_assembly_times =
false;
630 if (elemental_assembly_time.size() != 0)
632 can_load_balance_on_assembly_times =
true;
636 std::set<unsigned> global_eqns_on_this_proc;
640 std::map<unsigned, std::set<GeneralisedElement*>>
641 root_elements_connected_with_global_eqn_on_this_proc;
649 eqn_numbers_with_root_elements_on_this_proc.reserve(n_elem * 9);
657 number_of_dofs_for_root_element.reserve(n_elem);
661 number_of_non_halo_elements_for_root_element.reserve(n_elem);
665 total_assembly_time_for_root_element.reserve(n_elem);
669 std::map<GeneralisedElement*, unsigned> root_el_number_plus_one;
672 int number_of_root_elements = 0;
673 unsigned number_of_non_halo_elements = 0;
674 for (
unsigned e = 0;
e < n_elem;
e++)
676 double el_assembly_time = 0.0;
680 if (can_load_balance_on_assembly_times)
682 el_assembly_time = elemental_assembly_time[
e];
701 bool already_encountered =
false;
702 unsigned root_el_number = root_el_number_plus_one[root_el_pt];
703 if (root_el_number_plus_one[root_el_pt] == 0)
706 already_encountered =
false;
709 number_of_root_elements++;
710 root_el_number_plus_one[root_el_pt] = number_of_root_elements;
713 root_el_number = number_of_root_elements - 1;
718 already_encountered =
true;
726 unsigned n_dof = el_pt->
ndof();
727 for (
unsigned i = 0;
i < n_dof;
i++)
732 root_elements_connected_with_global_eqn_on_this_proc[eqn_no].insert(
736 global_eqns_on_this_proc.insert(eqn_no);
740 eqn_numbers_with_root_elements_on_this_proc.push_back(eqn_no);
745 if (already_encountered)
747 number_of_dofs_for_root_element[root_el_number] += n_dof;
748 number_of_non_halo_elements_for_root_element[root_el_number]++;
749 total_assembly_time_for_root_element[root_el_number] +=
754 number_of_dofs_for_root_element.push_back(n_dof);
755 number_of_non_halo_elements_for_root_element.push_back(1);
756 total_assembly_time_for_root_element.push_back(el_assembly_time);
760 number_of_non_halo_elements++;
766 unsigned root_processor = 0;
767 Vector<int> number_of_root_elements_on_each_proc(n_proc, 0);
768 MPI_Allgather(&number_of_root_elements,
771 &number_of_root_elements_on_each_proc[0],
774 comm_pt->mpi_comm());
782 unsigned total_number_of_root_elements = 0;
783 for (
unsigned i_proc = 0; i_proc < n_proc; i_proc++)
785 total_number_of_root_elements +=
786 number_of_root_elements_on_each_proc[i_proc];
789 start_index[i_proc] = total_number_of_root_elements -
790 number_of_root_elements_on_each_proc[i_proc];
800 int n_eqns_on_this_proc =
801 eqn_numbers_with_root_elements_on_this_proc.size();
807 MPI_Allgather(&n_eqns_on_this_proc,
810 &n_eqns_on_each_proc[0],
813 comm_pt->mpi_comm());
822 unsigned total_n_eqn = 0;
823 for (
unsigned i_proc = 0; i_proc < n_proc; i_proc++)
825 total_n_eqn += n_eqns_on_each_proc[i_proc];
828 start_eqns_index[i_proc] = total_n_eqn - n_eqns_on_each_proc[i_proc];
832 start_eqns_index[0] = 0;
840 total_number_of_root_elements);
842 if (number_of_dofs_for_root_element.size() == 0)
844 number_of_dofs_for_root_element.resize(1);
847 &number_of_dofs_for_root_element[0],
849 number_of_root_elements,
852 &number_of_dofs_for_global_root_element[0],
856 &number_of_root_elements_on_each_proc[0],
866 comm_pt->mpi_comm());
871 total_number_of_root_elements);
874 if (number_of_non_halo_elements_for_root_element.size() == 0)
876 number_of_non_halo_elements_for_root_element.resize(1);
878 MPI_Gatherv(&number_of_non_halo_elements_for_root_element[0],
881 number_of_root_elements,
884 &number_of_non_halo_elements_for_global_root_element[0],
889 &number_of_root_elements_on_each_proc[0],
899 comm_pt->mpi_comm());
904 total_number_of_root_elements);
907 if (total_assembly_time_for_root_element.size() == 0)
909 total_assembly_time_for_root_element.resize(1);
911 MPI_Gatherv(&total_assembly_time_for_root_element[0],
914 number_of_root_elements,
917 &total_assembly_time_for_global_root_element[0],
922 &number_of_root_elements_on_each_proc[0],
932 comm_pt->mpi_comm());
940 if (eqn_numbers_with_root_elements_on_this_proc.size() == 0)
942 eqn_numbers_with_root_elements_on_this_proc.resize(1);
944 MPI_Gatherv(&eqn_numbers_with_root_elements_on_this_proc[0],
947 &eqn_numbers_with_root_elements[0],
948 &n_eqns_on_each_proc[0],
949 &start_eqns_index[0],
952 comm_pt->mpi_comm());
955 clock_t cpu_end = clock();
957 double cpu0 = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
959 <<
"CPU time for global setup of METIS data structures [nroot_elem="
960 << total_number_of_root_elements <<
"]: " << cpu0 <<
" sec" << std::endl;
971 if (my_rank == root_processor)
974 clock_t cpu_start = clock();
978 std::set<unsigned> all_global_eqns_root_processor;
982 std::map<unsigned, std::set<unsigned>>
983 root_elements_connected_with_global_eqn_on_root_processor;
986 unsigned count_all = 0;
987 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
989 unsigned n_eqn_no = number_of_dofs_for_global_root_element[
e];
990 for (
unsigned n = 0; n < n_eqn_no; n++)
992 unsigned eqn_no = eqn_numbers_with_root_elements[count_all];
994 root_elements_connected_with_global_eqn_on_root_processor[eqn_no]
996 all_global_eqns_root_processor.insert(eqn_no);
1001 unsigned ndomain = n_proc;
1006 total_number_of_root_elements);
1013 for (std::set<unsigned>::iterator it =
1014 all_global_eqns_root_processor.begin();
1015 it != all_global_eqns_root_processor.end();
1019 std::set<unsigned> root_elements =
1020 root_elements_connected_with_global_eqn_on_root_processor[*it];
1024 for (std::set<unsigned>::iterator it1 = root_elements.begin();
1025 it1 != root_elements.end();
1028 for (std::set<unsigned>::iterator it2 = root_elements.begin();
1029 it2 != root_elements.end();
1032 if ((*it1) != (*it2))
1034 connected_root_elements[(*it1)].insert(*it2);
1041 clock_t cpu_end = clock();
1044 double cpu0b = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
1045 oomph_info <<
"CPU time for setup of connected elements (load balance) "
1047 << total_number_of_root_elements <<
"]: " << cpu0b <<
" sec"
1051 cpu_start = clock();
1052 int* xadj =
new int[total_number_of_root_elements + 1];
1056 adjacency_vector.reserve(count);
1059 unsigned ientry = 0;
1062 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1068 typedef std::set<unsigned>::iterator IT;
1069 for (IT it = connected_root_elements[
e].begin();
1070 it != connected_root_elements[
e].end();
1074 adjacency_vector.push_back(*it);
1081 xadj[
e + 1] = ientry;
1088 double cpu0 = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
1089 oomph_info <<
"CPU time for setup of METIS data structures (load "
1090 "balance) [nroot_elem="
1091 << total_number_of_root_elements <<
"]: " << cpu0 <<
" sec"
1099 cpu_start = clock();
1102 int nvertex = total_number_of_root_elements;
1118 int nparts = ndomain;
1121 int* options =
new int[10];
1124 #ifdef OOMPH_TRANSITION_TO_VERSION_3
1129 options[0] = METIS_OBJTYPE_CUT;
1134 options[0] = METIS_OBJTYPE_VOL;
1138 std::ostringstream error_stream;
1139 error_stream <<
"Wrong objective for METIS. objective = " << objective
1143 OOMPH_CURRENT_FUNCTION,
1144 OOMPH_EXCEPTION_LOCATION);
1149 int* edgecut =
new int[total_number_of_root_elements];
1152 int* part =
new int[total_number_of_root_elements];
1159 vwgt =
new int[total_number_of_root_elements];
1164 if (can_load_balance_on_assembly_times)
1166 oomph_info <<
"Basing distribution on assembly times of elements\n";
1169 double min_time = *(
1170 std::min_element(total_assembly_time_for_global_root_element.begin(),
1171 total_assembly_time_for_global_root_element.end()));
1173 if (min_time == 0.0)
1175 std::ostringstream error_stream;
1176 error_stream <<
"Minimum assemble time for element is zero!\n";
1178 OOMPH_CURRENT_FUNCTION,
1179 OOMPH_EXCEPTION_LOCATION);
1187 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1194 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1198 int(total_assembly_time_for_global_root_element[
e] / min_time);
1206 oomph_info <<
"Basing distribution on number of elements\n";
1207 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1209 vwgt[
e] = number_of_non_halo_elements_for_global_root_element[
e];
1217 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1221 part[
e] = (n_proc - 1) -
1222 unsigned(
double(
e) / double(total_number_of_root_elements) *
1227 <<
"Bypassing METIS for validation purposes.\n"
1228 <<
"Appending input for metis in metis_input_for_validation.dat\n";
1229 std::ofstream outfile;
1230 outfile.open(
"metis_input_for_validation.dat", std::ios_base::app);
1233 for (
unsigned e = 0;
e < total_number_of_root_elements + 1;
e++)
1235 outfile << xadj[
e] << std::endl;
1237 unsigned n = adjacency_vector.size();
1238 for (
unsigned i = 0;
i < n;
i++)
1240 outfile << adjacency_vector[
i] << std::endl;
1242 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1244 outfile << vwgt[
e] << std::endl;
1251 #ifdef OOMPH_TRANSITION_TO_VERSION_3
1255 &adjacency_vector[0],
1273 &adjacency_vector[0],
1283 else if (objective == 1)
1289 &adjacency_vector[0],
1304 for (
unsigned e = 0;
e < total_number_of_root_elements;
e++)
1306 root_element_domain[
e] = part[
e];
1307 total_weight_on_proc[part[
e]] += vwgt[
e];
1311 for (
unsigned j = 0; j < n_proc; j++)
1313 oomph_info <<
"Total weight on proc " << j <<
" is "
1314 << total_weight_on_proc[j] << std::endl;
1318 double cpu1 = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
1319 oomph_info <<
"CPU time for METIS mesh partitioning [nroot_elem="
1320 << total_number_of_root_elements <<
"]: " << cpu1 <<
" sec"
1337 cpu_start = clock();
1338 Vector<unsigned> root_element_domain_on_this_proc(number_of_root_elements);
1341 if (root_element_domain_on_this_proc.size() == 0)
1343 root_element_domain_on_this_proc.resize(1);
1345 MPI_Scatterv(&root_element_domain[0],
1346 &number_of_root_elements_on_each_proc[0],
1349 &root_element_domain_on_this_proc[0],
1350 number_of_root_elements,
1353 comm_pt->mpi_comm());
1358 element_domain_on_this_proc.resize(number_of_non_halo_elements);
1359 unsigned count_non_halo = 0;
1360 for (
unsigned e = 0;
e < n_elem;
e++)
1380 unsigned root_el_number = root_el_number_plus_one[root_el_pt] - 1;
1383 element_domain_on_this_proc[count_non_halo] =
1384 root_element_domain_on_this_proc[root_el_number];
1393 if (count_non_halo != number_of_non_halo_elements)
1395 std::ostringstream error_stream;
1396 error_stream <<
"Non-halo counts don't match: " << count_non_halo <<
" "
1397 << number_of_non_halo_elements << std::endl;
1400 error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
1408 double cpu2 = double(cpu_end - cpu_start) / CLOCKS_PER_SEC;
1409 oomph_info <<
"CPU time for communication of partition to all processors "
1411 << total_number_of_root_elements <<
"]: " << cpu2 <<
" sec"
void get_element_errors(Mesh *&mesh_pt, Vector< double > &elemental_error)
Compute the elemental error-measures for a given mesh and store them in a vector.
A Generalised Element class.
bool is_halo() const
Is this element a halo?
unsigned ndof() const
Return the number of equations/dofs in the element.
unsigned long eqn_number(const unsigned &ieqn_local) const
Return the global equation number corresponding to the ieqn_local-th local equation number.
GeneralisedElement *& element_pt(const unsigned long &e)
Return pointer to element e.
unsigned long nelement() const
Return number of elements in the mesh.
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...
An OomphLibError object which should be thrown when an run-time error is encountered....
////////////////////////////////////////////////////////////////// //////////////////////////////////...
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
Vector< double > elemental_assembly_time()
Return vector of most-recent elemental assembly times (used for load balancing). Zero sized if no Jac...
Mesh *& mesh_pt()
Return a pointer to the global mesh.
unsigned nsub_mesh() const
Return number of submeshes.
RefineableElements are FiniteElements that may be subdivided into children to provide a better local ...
virtual RefineableElement * root_element_pt()
Pointer to the root element in refinement hierarchy (must be implemented in specific elements that do...
Base class for refineable meshes. Provides standardised interfaces for the following standard mesh ad...
ErrorEstimator *& spatial_error_estimator_pt()
Access to spatial error estimator.
void start(const unsigned &i)
(Re-)start i-th timer
void uniform_partition_mesh(Problem *problem_pt, const unsigned &ndomain, Vector< unsigned > &element_domain)
Partition mesh uniformly by dividing elements equally over the partitions, in the order in which they...
void partition_distributed_mesh(Problem *problem_pt, const unsigned &objective, Vector< unsigned > &element_domain_on_this_proc, const bool &bypass_metis=false)
Use METIS to assign each element in an already-distributed mesh to a domain. On return,...
void partition_mesh(Problem *problem_pt, const unsigned &ndomain, const unsigned &objective, Vector< unsigned > &element_domain)
Use METIS to assign each element to a domain. On return, element_domain[ielem] contains the number of...
void(* ErrorToWeightFctPt)(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Typedef for function pointer to to function that translates spatial error into weight for METIS parti...
ErrorToWeightFctPt Error_to_weight_fct_pt
Function pointer to to function that translates spatial error into weight for METIS partitioning.
void default_error_to_weight_fct(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Default function that translates spatial error into weight for METIS partitioning (unit weight regard...
//////////////////////////////////////////////////////////////////// ////////////////////////////////...
void METIS_PartGraphKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function.
void METIS_PartGraphVKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function – decomposes nodal graph based on minimum communication volume.
OomphInfo oomph_info
Single (global) instantiation of the OomphInfo object – this is used throughout the library as a "rep...