problem.cc
Go to the documentation of this file.
1 // LIC// ====================================================================
2 // LIC// This file forms part of oomph-lib, the object-oriented,
3 // LIC// multi-physics finite-element library, available
4 // LIC// at http://www.oomph-lib.org.
5 // LIC//
6 // LIC// Copyright (C) 2006-2023 Matthias Heil and Andrew Hazel
7 // LIC//
8 // LIC// This library is free software; you can redistribute it and/or
9 // LIC// modify it under the terms of the GNU Lesser General Public
10 // LIC// License as published by the Free Software Foundation; either
11 // LIC// version 2.1 of the License, or (at your option) any later version.
12 // LIC//
13 // LIC// This library is distributed in the hope that it will be useful,
14 // LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // LIC// Lesser General Public License for more details.
17 // LIC//
18 // LIC// You should have received a copy of the GNU Lesser General Public
19 // LIC// License along with this library; if not, write to the Free Software
20 // LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 // LIC// 02110-1301 USA.
22 // LIC//
23 // LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
24 // LIC//
25 // LIC//====================================================================
26 
27 #ifdef OOMPH_HAS_MPI
28 #include "mpi.h"
29 #endif
30 
31 #include <list>
32 #include <algorithm>
33 #include <string>
34 
35 #include "oomph_utilities.h"
36 #include "problem.h"
37 #include "timesteppers.h"
38 #include "explicit_timesteppers.h"
40 #include "refineable_mesh.h"
41 #include "triangle_mesh.h"
42 #include "linear_solver.h"
43 #include "eigen_solver.h"
44 #include "assembly_handler.h"
45 #include "dg_elements.h"
46 #include "partitioning.h"
47 #include "spines.h"
48 
49 // Include to fill in additional_setup_shared_node_scheme() function
51 
52 
53 namespace oomph
54 {
55  /// ///////////////////////////////////////////////////////////////
56  // Non-inline functions for the problem class
57  /// ///////////////////////////////////////////////////////////////
58 
59  //=================================================================
60  /// The continuation timestepper object
61  //=================================================================
62  ContinuationStorageScheme Problem::Continuation_time_stepper;
63 
64  //================================================================
65  /// Constructor: Allocate space for one time stepper
66  /// and set all pointers to NULL and set defaults for all
67  /// parameters.
68  //===============================================================
70  : Mesh_pt(0),
71  Time_pt(0),
72  Explicit_time_stepper_pt(0),
73  Saved_dof_pt(0),
74  Default_set_initial_condition_called(false),
75  Use_globally_convergent_newton_method(false),
76  Empty_actions_before_read_unstructured_meshes_has_been_called(false),
77  Empty_actions_after_read_unstructured_meshes_has_been_called(false),
78  Store_local_dof_pt_in_elements(false),
79  Calculate_hessian_products_analytic(false),
80 #ifdef OOMPH_HAS_MPI
81  Doc_imbalance_in_parallel_assembly(false),
82  Use_default_partition_in_load_balance(false),
83  Must_recompute_load_balance_for_assembly(true),
84  Halo_scheme_pt(0),
85 #endif
86  Relaxation_factor(1.0),
87  Newton_solver_tolerance(1.0e-8),
89  Nnewton_iter_taken(0),
90  Max_residuals(10.0),
91  Time_adaptive_newton_crash_on_solve_fail(false),
92  Jacobian_reuse_is_enabled(false),
93  Jacobian_has_been_computed(false),
94  Problem_is_nonlinear(true),
95  Pause_at_end_of_sparse_assembly(false),
96  Doc_time_in_distribute(false),
97  Sparse_assembly_method(Perform_assembly_using_vectors_of_pairs),
98  Sparse_assemble_with_arrays_initial_allocation(400),
99  Sparse_assemble_with_arrays_allocation_increment(150),
100  Numerical_zero_for_sparse_assembly(0.0),
101  FD_step_used_in_get_hessian_vector_products(1.0e-8),
102  Mass_matrix_reuse_is_enabled(false),
103  Mass_matrix_has_been_computed(false),
104  Discontinuous_element_formulation(false),
105  Minimum_dt(1.0e-12),
106  Maximum_dt(1.0e12),
107  DTSF_max_increase(4.0),
108  DTSF_min_decrease(0.8),
109  Minimum_dt_but_still_proceed(-1.0),
110  Scale_arc_length(true),
111  Desired_proportion_of_arc_length(0.5),
112  Theta_squared(1.0),
113  Sign_of_jacobian(0),
114  Continuation_direction(1.0),
115  Parameter_derivative(1.0),
116  Parameter_current(0.0),
117  Use_continuation_timestepper(false),
118  Dof_derivative_offset(1),
119  Dof_current_offset(2),
120  Ds_current(0.0),
121  Desired_newton_iterations_ds(5),
122  Minimum_ds(1.0e-10),
123  Bifurcation_detection(false),
124  Bisect_to_find_bifurcation(false),
125  First_jacobian_sign_change(false),
126  Arc_length_step_taken(false),
127  Use_finite_differences_for_continuation_derivatives(false),
128 #ifdef OOMPH_HAS_MPI
129  Dist_problem_matrix_distribution(Uniform_matrix_distribution),
130  Parallel_sparse_assemble_previous_allocation(0),
131  Problem_has_been_distributed(false),
132  Bypass_increase_in_dof_check_during_pruning(false),
133  Max_permitted_error_for_halo_check(1.0e-14),
134 #endif
135  Shut_up_in_newton_solve(false),
136  Always_take_one_newton_step(false),
137  Timestep_reduction_factor_after_nonconvergence(0.5),
138  Keep_temporal_error_below_tolerance(true)
139  {
141 
142  /// Setup terminate helper
144 
145  // By default no submeshes:
146  Sub_mesh_pt.resize(0);
147  // No timesteppers
148  Time_stepper_pt.resize(0);
149 
150  // Set the linear solvers, eigensolver and assembly handler
153 
155 
157 
158  // setup the communicator
159 #ifdef OOMPH_HAS_MPI
161  {
163  }
164  else
165  {
167  }
168 #else
170 #endif
171 
172  // just create an empty linear algebra distribution for the
173  // DOFs
174  // this is setup when assign_eqn_numbers(...) is called.
176  }
177 
178  //================================================================
179  /// Destructor to clean up memory
180  //================================================================
182  {
183  // Delete the memory assigned for the global time
184  // (it's created on the fly in Problem::add_time_stepper_pt()
185  // so we are entitled to delete it.
186  if (Time_pt != 0)
187  {
188  delete Time_pt;
189  Time_pt = 0;
190  }
191 
192  // We're not using the default linear solver,
193  // somebody else must have built it, so that person
194  // must be in charge of killing it.
195  // We can safely delete the defaults, however
197 
200  delete Communicator_pt;
201  delete Dof_distribution_pt;
202 
203  // Delete any copies of the problem that have been created for
204  // use in adaptive bifurcation tracking.
205  // ALH: This will eventually go
206  unsigned n_copies = Copy_of_problem_pt.size();
207  for (unsigned c = 0; c < n_copies; c++)
208  {
209  delete Copy_of_problem_pt[c];
210  }
211 
212  // if this problem has sub meshes then we must delete the Mesh_pt
213  if (Sub_mesh_pt.size() != 0)
214  {
216  delete Mesh_pt;
217  }
218 
219  // Since we called the TerminateHelper setup function in the constructor,
220  // we need to delete anything that was dynamically allocated (as it's
221  // just a namespace and so doesn't have it's own destructor) in the function
223  }
224 
225  //=================================================================
226  /// Setup the count vector that records how many elements contribute
227  /// to each degree of freedom. Returns the total number of elements
228  /// in the problem
229  //=================================================================
231  {
232  // Now set the element counter to have the current Dof distribution
234  // We need to use the halo scheme (assuming it has been setup)
235 #ifdef OOMPH_HAS_MPI
237 #endif
238 
239  // Loop over the elements and count the entries
240  // and number of (non-halo) elements
241  const unsigned n_element = this->mesh_pt()->nelement();
242  unsigned n_non_halo_element_local = 0;
243  for (unsigned e = 0; e < n_element; e++)
244  {
245  GeneralisedElement* elem_pt = this->mesh_pt()->element_pt(e);
246 #ifdef OOMPH_HAS_MPI
247  // Ignore halo elements
248  if (!elem_pt->is_halo())
249  {
250 #endif
251  // Increment the number of non halo elements
252  ++n_non_halo_element_local;
253  // Now count the number of times the element contributes to a value
254  // using the current assembly handler
255  unsigned n_var = this->Assembly_handler_pt->ndof(elem_pt);
256  for (unsigned n = 0; n < n_var; n++)
257  {
259  this->Assembly_handler_pt->eqn_number(elem_pt, n));
260  }
261 #ifdef OOMPH_HAS_MPI
262  }
263 #endif
264  }
265 
266  // Storage for the total number of elements
267  unsigned Nelement = 0;
268 
269  // Add together all the counts if we are in parallel
270 #ifdef OOMPH_HAS_MPI
272 
273  // If distributed, find the total number of elements in the problem
275  {
276  // Need to gather the total number of non halo elements
277  MPI_Allreduce(&n_non_halo_element_local,
278  &Nelement,
279  1,
280  MPI_UNSIGNED,
281  MPI_SUM,
282  this->communicator_pt()->mpi_comm());
283  }
284  // Otherwise the total number is the same on each processor
285  else
286 #endif
287  {
288  Nelement = n_non_halo_element_local;
289  }
290 
291  return Nelement;
292  }
293 
294 
295  //==================================================================
296  /// Build new LinearAlgebraDistribution. Note: you're in charge of
297  /// deleting it!
298  //==================================================================
300  LinearAlgebraDistribution*& dist_pt)
301  {
302  // Find the number of rows
303  const unsigned nrow = this->ndof();
304 
305 #ifdef OOMPH_HAS_MPI
306 
307  unsigned nproc = Communicator_pt->nproc();
308 
309  // if problem is only one one processor assemble non-distributed
310  // distribution
311  if (nproc == 1)
312  {
313  dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, false);
314  }
315  // if the problem is not distributed then assemble the jacobian with
316  // a uniform distributed distribution
318  {
319  dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, true);
320  }
321  // otherwise the problem is a distributed problem
322  else
323  {
325  {
327 
328  dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, true);
329  break;
330 
332 
334  break;
335 
337 
338  // Put in its own scope to avoid warnings about "local" variables
339  {
340  LinearAlgebraDistribution* uniform_dist_pt =
342  bool use_problem_dist = true;
343  for (unsigned p = 0; p < nproc; p++)
344  {
345  // hierher Andrew: what's the logic behind this?
346  if ((double)Dof_distribution_pt->nrow_local(p) >
347  ((double)uniform_dist_pt->nrow_local(p)) * 1.1)
348  {
349  use_problem_dist = false;
350  }
351  }
352  if (use_problem_dist)
353  {
355  }
356  else
357  {
358  dist_pt = new LinearAlgebraDistribution(uniform_dist_pt);
359  }
360  delete uniform_dist_pt;
361  }
362  break;
363 
364  default:
365 
366  std::ostringstream error_stream;
367  error_stream << "Never get here. Dist_problem_matrix_distribution = "
368  << Dist_problem_matrix_distribution << std::endl;
369  throw OomphLibError(error_stream.str(),
370  OOMPH_CURRENT_FUNCTION,
371  OOMPH_EXCEPTION_LOCATION);
372  break;
373  }
374  }
375 #else
376  dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, false);
377 #endif
378  }
379 
380 
381 #ifdef OOMPH_HAS_MPI
382 
383  //==================================================================
384  /// Setup the halo scheme for the degrees of freedom
385  //==================================================================
387  {
388  // Find the number of elements stored on this processor
389  const unsigned n_element = this->mesh_pt()->nelement();
390 
391  // Work out the all global equations to which this processor
392  // contributes
393  Vector<unsigned> my_eqns;
394  this->get_my_eqns(this->Assembly_handler_pt, 0, n_element - 1, my_eqns);
395 
396  // Build the halo scheme, based on the equations to which this
397  // processor contributes
399  new DoubleVectorHaloScheme(this->Dof_distribution_pt, my_eqns);
400 
401  // Find pointers to all the halo dofs
402  // There may be more of these than required by my_eqns
403  //(but should not be less)
404  std::map<unsigned, double*> halo_data_pt;
405  this->get_all_halo_data(halo_data_pt);
406 
407  // Now setup the Halo_dofs
408  Halo_scheme_pt->setup_halo_dofs(halo_data_pt, this->Halo_dof_pt);
409  }
410 
411  //==================================================================
412  /// Distribute the problem without doc; report stats if required.
413  /// Returns actual partitioning used, e.g. for restart.
414  //==================================================================
415  Vector<unsigned> Problem::distribute(const bool& report_stats)
416  {
417  // Set dummy doc paramemters
418  DocInfo doc_info;
419  doc_info.disable_doc();
420 
421  // Set the sizes of the input and output vectors
422  unsigned n_element = mesh_pt()->nelement();
423  Vector<unsigned> element_partition(n_element, 0);
424 
425  // Distribute and return partitioning
426  return distribute(element_partition, doc_info, report_stats);
427  }
428 
429  //==================================================================
430  /// Distribute the problem according to specified partition.
431  /// If all entries in partitioning vector are zero we use METIS
432  /// to do the partitioning after all.
433  /// Returns actual partitioning used, e.g. for restart.
434  //==================================================================
436  const Vector<unsigned>& element_partition, const bool& report_stats)
437  {
438 #ifdef PARANOID
439  bool has_non_zero_entry = false;
440  unsigned n = element_partition.size();
441  for (unsigned i = 0; i < n; i++)
442  {
443  if (element_partition[i] != 0)
444  {
445  has_non_zero_entry = true;
446  break;
447  }
448  }
449  if (!has_non_zero_entry)
450  {
451  std::ostringstream warn_message;
452  warn_message << "WARNING: All entries in specified partitioning vector \n"
453  << " are zero -- will ignore this and use METIS\n"
454  << " to perform the partitioning\n";
456  warn_message.str(), "Problem::distribute()", OOMPH_EXCEPTION_LOCATION);
457  }
458 #endif
459  // Set dummy doc paramemters
460  DocInfo doc_info;
461  doc_info.disable_doc();
462 
463  // Distribute and return partitioning
464  return distribute(element_partition, doc_info, report_stats);
465  }
466 
467  //==================================================================
468  /// Distribute the problem and doc to specified DocInfo.
469  /// Returns actual partitioning used, e.g. for restart.
470  //==================================================================
472  const bool& report_stats)
473  {
474  // Set the sizes of the input and output vectors
475  unsigned n_element = mesh_pt()->nelement();
476 
477  // Dummy input vector
478  Vector<unsigned> element_partition(n_element, 0);
479 
480  // Distribute and return partitioning
481  return distribute(element_partition, doc_info, report_stats);
482  }
483 
484  //==================================================================
485  /// Distribute the problem according to specified partition.
486  /// (If all entries in partitioning vector are zero we use METIS
487  /// to do the partitioning after all) and doc.
488  /// Returns actual partitioning used, e.g. for restart.
489  //==================================================================
491  const Vector<unsigned>& element_partition,
492  DocInfo& doc_info,
493  const bool& report_stats)
494  {
495  // Storage for number of processors and number of elements in global mesh
496  int n_proc = this->communicator_pt()->nproc();
497  int my_rank = this->communicator_pt()->my_rank();
498  int n_element = mesh_pt()->nelement();
499 
500  // Vector to be returned
501  Vector<unsigned> return_element_domain;
502 
503  // Buffer extreme cases
504  if (n_proc == 1) // single-process job - don't do anything
505  {
506  if (report_stats)
507  {
508  std::ostringstream warn_message;
509  warn_message << "WARNING: You've tried to distribute a problem over\n"
510  << "only one processor: this would make METIS crash.\n"
511  << "Ignoring your request for distribution.\n";
512  OomphLibWarning(warn_message.str(),
513  "Problem::distribute()",
514  OOMPH_EXCEPTION_LOCATION);
515  }
516  }
517  else if (n_proc > n_element) // more processors than elements
518  {
519  // Throw an error
520  std::ostringstream error_stream;
521  error_stream << "You have tried to distribute a problem\n"
522  << "but there are less elements than processors.\n"
523  << "Please re-run with more elements!\n"
524  << "Please also ensure that actions_before_distribute().\n"
525  << "and actions_after_distribute() are correctly set up.\n"
526  << std::endl;
527  throw OomphLibError(
528  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
529  }
530  else
531  {
532  // We only distribute uniformly-refined meshes; buffer the case where
533  // either mesh is not uniformly refined
534  bool a_mesh_is_not_uniformly_refined = false;
535  unsigned n_mesh = nsub_mesh();
536  if (n_mesh == 0)
537  {
538  // Check refinement levels
539  if (TreeBasedRefineableMeshBase* mmesh_pt =
540  dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
541  {
542  unsigned min_ref_level = 0;
543  unsigned max_ref_level = 0;
544  mmesh_pt->get_refinement_levels(min_ref_level, max_ref_level);
545  // If they are not the same
546  if (max_ref_level != min_ref_level)
547  {
548  a_mesh_is_not_uniformly_refined = true;
549  }
550  }
551  }
552  else
553  {
554  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
555  {
556  // Check refinement levels for each mesh individually
557  // (one mesh is allowed to be "more uniformly refined" than another)
558  if (TreeBasedRefineableMeshBase* mmesh_pt =
559  dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
560  {
561  unsigned min_ref_level = 0;
562  unsigned max_ref_level = 0;
563  mmesh_pt->get_refinement_levels(min_ref_level, max_ref_level);
564  // If they are not the same
565  if (max_ref_level != min_ref_level)
566  {
567  a_mesh_is_not_uniformly_refined = true;
568  }
569  }
570  }
571  }
572 
573  // If any mesh is not uniformly refined
574  if (a_mesh_is_not_uniformly_refined)
575  {
576  // Again it may make more sense to throw an error here as the user
577  // will probably not be running a problem that is small enough to
578  // fit the whole of on each processor
579  std::ostringstream error_stream;
580  error_stream << "You have tried to distribute a problem\n"
581  << "but at least one of your meshes is no longer\n"
582  << "uniformly refined. In order to preserve the Tree\n"
583  << "and TreeForest structure, Problem::distribute() can\n"
584  << "only be called while meshes are uniformly refined.\n"
585  << std::endl;
586  throw OomphLibError(
587  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
588  }
589  else
590  {
591  // Is there any global data? If so, distributing the problem won't work
592  if (nglobal_data() > 0)
593  {
594  std::ostringstream error_stream;
595  error_stream << "You have tried to distribute a problem\n"
596  << "and there is some global data.\n"
597  << "This is not likely to work...\n"
598  << std::endl;
599  throw OomphLibError(error_stream.str(),
600  OOMPH_CURRENT_FUNCTION,
601  OOMPH_EXCEPTION_LOCATION);
602  }
603 
604  double t_start = 0;
606  {
607  t_start = TimingHelpers::timer();
608  }
609 
610 
611 #ifdef PARANOID
612  unsigned old_ndof = ndof();
613 #endif
614 
615  // Need to partition the global mesh before distributing
616  Mesh* global_mesh_pt = mesh_pt();
617 
618  // Vector listing the affiliation of each element
619  unsigned nelem = global_mesh_pt->nelement();
620  Vector<unsigned> element_domain(nelem);
621 
622  // Number of elements that I'm in charge of, based on any
623  // incoming partitioning
624  unsigned n_my_elements = 0;
625 
626  // Have we used the pre-set partitioning
627  bool used_preset_partitioning = false;
628 
629  // Partition the mesh, unless the partition has already been passed in
630  // If it hasn't then the sum of all the entries of the vector should be
631  // 0
632  unsigned sum_element_partition = 0;
633  unsigned n_part = element_partition.size();
634  for (unsigned e = 0; e < n_part; e++)
635  {
636  // ... another one for me.
637  if (int(element_partition[e]) == my_rank) n_my_elements++;
638 
639  sum_element_partition += element_partition[e];
640  }
641  if (sum_element_partition == 0)
642  {
643  oomph_info << "INFO: using METIS to partition elements" << std::endl;
644  partition_global_mesh(global_mesh_pt, doc_info, element_domain);
645  used_preset_partitioning = false;
646  }
647  else
648  {
649  oomph_info << "INFO: using pre-set partition of elements"
650  << std::endl;
651  used_preset_partitioning = true;
652  element_domain = element_partition;
653  }
654 
655  // Set the GLOBAL Mesh as being distributed
656  global_mesh_pt->set_communicator_pt(this->communicator_pt());
657 
658  double t_end = 0.0;
660  {
661  t_end = TimingHelpers::timer();
662  oomph_info << "Time for partitioning of global mesh: "
663  << t_end - t_start << std::endl;
664  t_start = TimingHelpers::timer();
665  }
666 
667  // Store how many elements we had in the various sub-meshes
668  // before actions_before_distribute() (which may empty some of
669  // them).
670  Vector<unsigned> n_element_in_old_submesh(n_mesh);
671  if (n_mesh != 0)
672  {
673  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
674  {
675  unsigned nsub_elem = mesh_pt(i_mesh)->nelement();
676  n_element_in_old_submesh[i_mesh] = nsub_elem;
677  }
678  }
679 
680  // Partitioning complete; call actions before distribute
682 
684  {
685  t_end = TimingHelpers::timer();
686  oomph_info << "Time for actions before distribute: "
687  << t_end - t_start << std::endl;
688  }
689 
690  // This next bit is cheap -- omit timing
691  // t_start = TimingHelpers::timer();
692 
693  // Number of submeshes (NB: some may have been deleted in
694  // actions_after_distribute())
695  n_mesh = nsub_mesh();
696 
697 
698  // Prepare vector of vectors for submesh element domains
699  Vector<Vector<unsigned>> submesh_element_domain(n_mesh);
700 
701  // The submeshes need to know their own element domains.
702  // Also if any meshes have been emptied we ignore their
703  // partitioning in the vector that we return from here
704  return_element_domain.reserve(element_domain.size());
705  if (n_mesh != 0)
706  {
707  unsigned count = 0;
708  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
709  {
710  unsigned nsub_elem = mesh_pt(i_mesh)->nelement();
711  submesh_element_domain[i_mesh].resize(nsub_elem);
712  unsigned nsub_elem_old = n_element_in_old_submesh[i_mesh];
713  for (unsigned e = 0; e < nsub_elem_old; e++)
714  {
715  if (nsub_elem_old == nsub_elem)
716  {
717  submesh_element_domain[i_mesh][e] = element_domain[count];
718  return_element_domain.push_back(element_domain[count]);
719  }
720  // return_element_domain.push_back(element_domain[count]);
721  count++;
722  }
723  }
724  }
725  else
726  {
727  return_element_domain = element_domain;
728  }
729 
731  {
732  t_start = TimingHelpers::timer();
733  }
734 
735  // Setup the map between "root" element and number in global mesh
736  // (currently used in the load_balance() routines)
737 
738  // This map is only established for structured meshes, then we
739  // need to check here the type of mesh
740  if (n_mesh == 0)
741  {
742  // Check if the only one mesh is an structured mesh
743  bool structured_mesh = true;
744  TriangleMeshBase* tri_mesh_pt =
745  dynamic_cast<TriangleMeshBase*>(mesh_pt(0));
746  if (tri_mesh_pt != 0)
747  {
748  structured_mesh = false;
749  } // if (tri_mesh_pt != 0)
750  if (structured_mesh)
751  {
752  const unsigned n_ele = global_mesh_pt->nelement();
753  Base_mesh_element_pt.resize(n_ele);
755  for (unsigned e = 0; e < n_ele; e++)
756  {
757  GeneralisedElement* el_pt = global_mesh_pt->element_pt(e);
759  Base_mesh_element_pt[e] = el_pt;
760  } // for (e<n_ele)
761  } // A TreeBaseMesh mesh
762  } // if (n_mesh==0)
763  else
764  {
765  // If we have submeshes then we only add those elements that
766  // belong to structured meshes, but first compute the number
767  // of total elements in the structured meshes
768  unsigned nglobal_element = 0;
769  // Store which submeshes are structured
770  std::vector<bool> is_structured_mesh(n_mesh);
771  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
772  {
773  TriangleMeshBase* tri_mesh_pt =
774  dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
775  if (tri_mesh_pt != 0)
776  {
777  // Set the flags to indicate this is not an structured
778  // mesh
779  is_structured_mesh[i_mesh] = false;
780  } // if (tri_mesh_pt != 0)
781  else
782  {
783  // Set the flags to indicate this is an structured
784  // mesh
785  is_structured_mesh[i_mesh] = true;
786  } // else if (tri_mesh_pt != 0)
787  // Check if mesh is an structured mesh
788  if (is_structured_mesh[i_mesh])
789  {
790  nglobal_element += mesh_pt(i_mesh)->nelement();
791  } // A TreeBaseMesh mesh
792  } // for (i_mesh<n_mesh)
793 
794  // Once computed the number of elements, then resize the
795  // structure
796  Base_mesh_element_pt.resize(nglobal_element);
798  unsigned counter = 0;
799  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
800  {
801  // Check if mesh is an structured mesh
802  if (is_structured_mesh[i_mesh])
803  {
804  const unsigned n_ele = mesh_pt(i_mesh)->nelement();
805  for (unsigned e = 0; e < n_ele; e++)
806  {
807  GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
808  Base_mesh_element_number_plus_one[el_pt] = counter + 1;
809  Base_mesh_element_pt[counter] = el_pt;
810  // Inrease the global element number
811  counter++;
812  } // for (e<n_ele)
813  } // An structured mesh
814  } // for (i_mesh<n_mesh)
815 
816 #ifdef PARANOID
817  if (counter != nglobal_element)
818  {
819  std::ostringstream error_stream;
820  error_stream
821  << "The number of global elements (" << nglobal_element
822  << ") is not the sameas the number of\nadded elements ("
823  << counter << ") to the Base_mesh_element_pt data "
824  << "structure!!!\n\n";
825  throw OomphLibError(error_stream.str(),
826  "Problem::distribute()",
827  OOMPH_EXCEPTION_LOCATION);
828  } // if (counter != nglobal_element)
829 #endif // #ifdef PARANOID
830 
831  } // else if (n_mesh==0)
832 
833  // Wipe everything if a pre-determined partitioning
834  // didn't specify ANY elements for this processor
835  // (typically happens during restarts with larger number
836  // of processors -- in this case we really want an empty
837  // processor rather than one with any "kept" halo elements)
838  bool overrule_keep_as_halo_element_status = false;
839  if ((n_my_elements == 0) && (used_preset_partitioning))
840  {
841  oomph_info << "INFO: We're over-ruling the \"keep as halo element\"\n"
842  << " status because the preset partitioning\n"
843  << " didn't place ANY elements on this processor,\n"
844  << " probably because of a restart on a larger \n"
845  << " number of processors\n";
846  overrule_keep_as_halo_element_status = true;
847  }
848 
849 
850  // Distribute the (sub)meshes (i.e. sort out their halo lookup schemes)
851  Vector<GeneralisedElement*> deleted_element_pt;
852  if (n_mesh == 0)
853  {
854  global_mesh_pt->distribute(this->communicator_pt(),
855  element_domain,
856  deleted_element_pt,
857  doc_info,
858  report_stats,
859  overrule_keep_as_halo_element_status);
860  }
861  else // There are submeshes, "distribute" each one separately
862  {
863  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
864  {
865  if (report_stats)
866  {
867  oomph_info << "Distributing submesh " << i_mesh << std::endl
868  << "--------------------" << std::endl;
869  }
870  // Set the doc_info number to reflect the submesh
871  doc_info.number() = i_mesh;
872  mesh_pt(i_mesh)->distribute(this->communicator_pt(),
873  submesh_element_domain[i_mesh],
874  deleted_element_pt,
875  doc_info,
876  report_stats,
877  overrule_keep_as_halo_element_status);
878  }
879  // Rebuild the global mesh
881  }
882 
883  // Null out information associated with deleted elements
884  unsigned n_del = deleted_element_pt.size();
885  for (unsigned e = 0; e < n_del; e++)
886  {
887  GeneralisedElement* el_pt = deleted_element_pt[e];
888  unsigned old_el_number = Base_mesh_element_number_plus_one[el_pt] - 1;
890  Base_mesh_element_pt[old_el_number] = 0;
891  }
892 
894  {
895  t_end = TimingHelpers::timer();
896  oomph_info << "Time for mesh-level distribution: " << t_end - t_start
897  << std::endl;
898  t_start = TimingHelpers::timer();
899  }
900 
901  // Now the problem has been distributed
903 
904  // Call actions after distribute
906 
908  {
909  t_end = TimingHelpers::timer();
910  oomph_info << "Time for actions after distribute: " << t_end - t_start
911  << std::endl;
912  t_start = TimingHelpers::timer();
913  }
914 
915  // Re-assign the equation numbers (incl synchronisation if reqd)
916  unsigned n_dof = assign_eqn_numbers();
917  oomph_info << "Number of equations: " << n_dof << std::endl;
918 
920  {
921  t_end = TimingHelpers::timer();
922  oomph_info << "Time for re-assigning eqn numbers (in distribute): "
923  << t_end - t_start << std::endl;
924  }
925 
926 
927 #ifdef PARANOID
928  if (n_dof != old_ndof)
929  {
930  std::ostringstream error_stream;
931  error_stream
932  << "Number of dofs in distribute() has changed "
933  << "from " << old_ndof << " to " << n_dof << "\n"
934  << "Check that you've implemented any necessary "
935  "actions_before/after\n"
936  << "distribute functions, e.g. to pin redundant pressure dofs"
937  << " etc.\n";
938  throw OomphLibError(error_stream.str(),
939  OOMPH_CURRENT_FUNCTION,
940  OOMPH_EXCEPTION_LOCATION);
941  }
942 #endif
943 
944  } // end if to check for uniformly refined mesh(es)
945 
946  } // end if to check number of processors vs. number of elements etc.
947 
948 
949  // Force re-analysis of time spent on assembly each
950  // elemental Jacobian
952  Elemental_assembly_time.clear();
953 
954  // Return the partition vector used in the distribution
955  return return_element_domain;
956  }
957 
958  //==================================================================
959  /// Partition the global mesh, return vector specifying the processor
960  /// number for each element. Virtual so that it can be overloaded by
961  /// any user; the default is to use METIS to perform the partitioning
962  /// (with a bit of cleaning up afterwards to sort out "special cases").
963  //==================================================================
964  void Problem::partition_global_mesh(Mesh*& global_mesh_pt,
965  DocInfo& doc_info,
966  Vector<unsigned>& element_domain,
967  const bool& report_stats)
968  {
969  // Storage for number of processors and current processor
970  int n_proc = this->communicator_pt()->nproc();
971  int rank = this->communicator_pt()->my_rank();
972 
973  std::ostringstream filename;
974  std::ofstream some_file;
975 
976  // Doc the original mesh on proc 0
977  //--------------------------------
978  if (doc_info.is_doc_enabled())
979  {
980  if (rank == 0)
981  {
982  filename << doc_info.directory() << "/complete_mesh"
983  << doc_info.number() << ".dat";
984  global_mesh_pt->output(filename.str().c_str(), 5);
985  }
986  }
987 
988  // Partition the mesh
989  //-------------------
990  // METIS Objective (0: minimise edge cut; 1: minimise total comm volume)
991  unsigned objective = 0;
992 
993  // Do the partitioning
994  unsigned nelem = 0;
995  if (this->communicator_pt()->my_rank() == 0)
996  {
997  METIS::partition_mesh(this, n_proc, objective, element_domain);
998  nelem = element_domain.size();
999  }
1000  MPI_Bcast(&nelem, 1, MPI_UNSIGNED, 0, this->communicator_pt()->mpi_comm());
1001  element_domain.resize(nelem);
1002  MPI_Bcast(&element_domain[0],
1003  nelem,
1004  MPI_UNSIGNED,
1005  0,
1006  this->communicator_pt()->mpi_comm());
1007 
1008  // On very coarse meshes with larger numbers of processors, METIS
1009  // occasionally returns an element_domain Vector for which a particular
1010  // processor has no elements affiliated to it; the following fixes this
1011 
1012  // Convert element_domain to integer storage
1013  Vector<int> int_element_domain(nelem);
1014  for (unsigned e = 0; e < nelem; e++)
1015  {
1016  int_element_domain[e] = element_domain[e];
1017  }
1018 
1019  // Global storage for number of elements on each process
1020  int my_number_of_elements = 0;
1021  Vector<int> number_of_elements(n_proc, 0);
1022 
1023  for (unsigned e = 0; e < nelem; e++)
1024  {
1025  if (int_element_domain[e] == rank)
1026  {
1027  my_number_of_elements++;
1028  }
1029  }
1030 
1031  // Communicate the correct value for each single process into
1032  // the global storage vector
1033  MPI_Allgather(&my_number_of_elements,
1034  1,
1035  MPI_INT,
1036  &number_of_elements[0],
1037  1,
1038  MPI_INT,
1039  this->communicator_pt()->mpi_comm());
1040 
1041  // If a process has no elements then switch an element with the
1042  // process with the largest number of elements, assuming
1043  // that it still has enough elements left to share
1044  int max_number_of_elements = 0;
1045  int process_with_max_elements = 0;
1046  for (int d = 0; d < n_proc; d++)
1047  {
1048  if (number_of_elements[d] == 0)
1049  {
1050  // Find the process with maximum number of elements
1051  if (max_number_of_elements <= 1)
1052  {
1053  for (int dd = 0; dd < n_proc; dd++)
1054  {
1055  if (number_of_elements[dd] > max_number_of_elements)
1056  {
1057  max_number_of_elements = number_of_elements[dd];
1058  process_with_max_elements = dd;
1059  }
1060  }
1061  }
1062 
1063  // Check that this number of elements is okay for sharing
1064  if (max_number_of_elements <= 1)
1065  {
1066  // Throw an error if elements can't be shared
1067  std::ostringstream error_stream;
1068  error_stream << "No process has more than 1 element, and\n"
1069  << "at least one process has no elements!\n"
1070  << "Suggest rerunning with more refinement.\n"
1071  << std::endl;
1072  throw OomphLibError(error_stream.str(),
1073  OOMPH_CURRENT_FUNCTION,
1074  OOMPH_EXCEPTION_LOCATION);
1075  }
1076 
1077  // Loop over the element domain vector and switch
1078  // one value for process "process_with_max_elements" with d
1079  for (unsigned e = 0; e < nelem; e++)
1080  {
1081  if (int_element_domain[e] == process_with_max_elements)
1082  {
1083  int_element_domain[e] = d;
1084  // Change the numbers associated with these processes
1085  number_of_elements[d]++;
1086  number_of_elements[process_with_max_elements]--;
1087  // Reduce the number of elements available on "max" process
1088  max_number_of_elements--;
1089  // Inform the user that a switch has taken place
1090  if (report_stats)
1091  {
1092  oomph_info << "INFO: Switched element domain at position " << e
1093  << std::endl
1094  << "from process " << process_with_max_elements
1095  << " to process " << d << std::endl
1096  << "which was given no elements by METIS partition"
1097  << std::endl;
1098  }
1099  // Only need to do this once for this element loop, otherwise
1100  // this will take all the elements from "max" process and put them
1101  // in process d, thus leaving essentially the same problem!
1102  break;
1103  }
1104  }
1105  }
1106  }
1107 
1108  // Reassign new values to the element_domain vector
1109  for (unsigned e = 0; e < nelem; e++)
1110  {
1111  element_domain[e] = int_element_domain[e];
1112  }
1113 
1114  unsigned count_elements = 0;
1115  for (unsigned e = 0; e < nelem; e++)
1116  {
1117  if (int(element_domain[e]) == rank)
1118  {
1119  count_elements++;
1120  }
1121  }
1122 
1123  if (report_stats)
1124  {
1125  oomph_info << "I have " << count_elements
1126  << " elements from this partition" << std::endl
1127  << std::endl;
1128  }
1129  }
1130 
1131  //==================================================================
1132  /// (Irreversibly) prune halo(ed) elements and nodes, usually
1133  /// after another round of refinement, to get rid of
1134  /// excessively wide halo layers. Note that the current
1135  /// mesh will be now regarded as the base mesh and no unrefinement
1136  /// relative to it will be possible once this function
1137  /// has been called.
1138  //==================================================================
1140  const bool& report_stats)
1141  {
1142  // Storage for number of processors and current processor
1143  int n_proc = this->communicator_pt()->nproc();
1144 
1145  // Has the problem been distributed yet?
1147  {
1148  oomph_info
1149  << "WARNING: Problem::prune_halo_elements_and_nodes() was called on a "
1150  << "non-distributed Problem!" << std::endl;
1151  oomph_info << "Ignoring your request..." << std::endl;
1152  }
1153  else
1154  {
1155  // There are no halo layers to prune if it's a single-process job
1156  if (n_proc == 1)
1157  {
1158  oomph_info
1159  << "WARNING: You've tried to prune halo layers on a problem\n"
1160  << "with only one processor: this is unnecessary.\n"
1161  << "Ignoring your request." << std::endl
1162  << std::endl;
1163  }
1164  else
1165  {
1166 #ifdef PARANOID
1167  unsigned old_ndof = ndof();
1168 #endif
1169 
1170  double t_start = 0.0;
1172  {
1173  t_start = TimingHelpers::timer();
1174  }
1175 
1176  // Call actions before distribute
1178 
1179  double t_end = 0.0;
1181  {
1182  t_end = TimingHelpers::timer();
1183  oomph_info << "Time for actions_before_distribute() in "
1184  << "Problem::prune_halo_elements_and_nodes(): "
1185  << t_end - t_start << std::endl;
1186  t_start = TimingHelpers::timer();
1187  }
1188 
1189  // Associate all elements with root in current Base mesh
1190  unsigned nel = Base_mesh_element_pt.size();
1191  std::map<GeneralisedElement*, unsigned>
1192  old_base_element_number_plus_one;
1193  std::vector<bool> old_root_is_halo_or_non_existent(nel, true);
1194  for (unsigned e = 0; e < nel; e++)
1195  {
1196  // Get the base element
1197  GeneralisedElement* base_el_pt = Base_mesh_element_pt[e];
1198 
1199  // Does it exist locally?
1200  if (base_el_pt != 0)
1201  {
1202  // Check if it's a halo element
1203  if (!base_el_pt->is_halo())
1204  {
1205  old_root_is_halo_or_non_existent[e] = false;
1206  }
1207 
1208  // Not refineable: It's only the element iself
1209  RefineableElement* ref_el_pt = 0;
1210  ref_el_pt = dynamic_cast<RefineableElement*>(base_el_pt);
1211  if (ref_el_pt == 0)
1212  {
1213  old_base_element_number_plus_one[base_el_pt] = e + 1;
1214  }
1215  // Refineable: Get entire tree of elements
1216  else
1217  {
1218  Vector<Tree*> tree_pt;
1219  ref_el_pt->tree_pt()->stick_all_tree_nodes_into_vector(tree_pt);
1220  unsigned ntree = tree_pt.size();
1221  for (unsigned t = 0; t < ntree; t++)
1222  {
1223  old_base_element_number_plus_one[tree_pt[t]->object_pt()] =
1224  e + 1;
1225  }
1226  }
1227  }
1228  }
1229 
1230 
1232  {
1233  t_end = TimingHelpers::timer();
1234  oomph_info << "Time for setup old root elements in "
1235  << "Problem::prune_halo_elements_and_nodes(): "
1236  << t_end - t_start << std::endl;
1237  t_start = TimingHelpers::timer();
1238  }
1239 
1240 
1241  // Now remember the old number of base elements
1242  unsigned nel_base_old = nel;
1243 
1244 
1245  // Prune the halo elements and nodes of the mesh(es)
1246  Vector<GeneralisedElement*> deleted_element_pt;
1247  unsigned n_mesh = nsub_mesh();
1248  if (n_mesh == 0)
1249  {
1250  // Prune halo elements and nodes for the (single) global mesh
1252  deleted_element_pt, doc_info, report_stats);
1253  }
1254  else
1255  {
1256  // Loop over individual submeshes and prune separately
1257  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
1258  {
1260  deleted_element_pt, doc_info, report_stats);
1261  }
1262 
1263  // Rebuild the global mesh
1265  }
1266 
1268  {
1269  t_end = TimingHelpers::timer();
1270  oomph_info << "Total time for all mesh-level prunes in "
1271  << "Problem::prune_halo_elements_and_nodes(): "
1272  << t_end - t_start << std::endl;
1273  t_start = TimingHelpers::timer();
1274  }
1275 
1276  // Loop over all elements in newly rebuilt mesh (which contains
1277  // all element in "tree order"), find the roots
1278  // (which are either non-refineable elements or refineable elements
1279  // whose tree representations are TreeRoots)
1280  std::map<FiniteElement*, bool> root_el_done;
1281 
1282  // Vector storing vectors of pointers to new base elements associated
1283  // with the same old base element
1285  new_base_element_associated_with_old_base_element(nel_base_old);
1286 
1287  unsigned n_meshes = n_mesh;
1288  // Change the value for the number of submeshes if there is only
1289  // one mesh so that the loop below works if we have only one
1290  // mesh
1291  if (n_meshes == 0)
1292  {
1293  n_meshes = 1;
1294  }
1295 
1296  // Store which submeshes, if there are some are structured
1297  // meshes
1298  std::vector<bool> is_structured_mesh(n_meshes);
1299 
1300  // Loop over all elements in the rebuilt mesh, but make sure
1301  // that we are only looping over the structured meshes
1302  nel = 0;
1303  for (unsigned i_mesh = 0; i_mesh < n_meshes; i_mesh++)
1304  {
1305  TriangleMeshBase* tri_mesh_pt =
1306  dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
1307  if (!(tri_mesh_pt != 0))
1308  {
1309  // Mark the mesh as structured mesh
1310  is_structured_mesh[i_mesh] = true;
1311  // Add the number of elements
1312  nel += mesh_pt(i_mesh)->nelement();
1313  } // if (!(tri_mesh_pt!=0))
1314  else
1315  {
1316  // Mark the mesh as nonstructured mesh
1317  is_structured_mesh[i_mesh] = false;
1318  } // else if (!(tri_mesh_pt!=0))
1319  } // for (i_mesh < n_mesh)
1320 
1321  // Go for all the meshes (if there are submeshes)
1322  for (unsigned i_mesh = 0; i_mesh < n_meshes; i_mesh++)
1323  {
1324  // Only work with the elements in the mesh if it is an
1325  // structured mesh
1326  if (is_structured_mesh[i_mesh])
1327  {
1328  // Get the number of elements in the submesh
1329  const unsigned nele_submesh = mesh_pt(i_mesh)->nelement();
1330  for (unsigned e = 0; e < nele_submesh; e++)
1331  {
1332  // Get the element
1333  GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
1334 
1335  // Not refineable: It's definitely a new base element
1336  RefineableElement* ref_el_pt = 0;
1337  ref_el_pt = dynamic_cast<RefineableElement*>(el_pt);
1338  if (ref_el_pt == 0)
1339  {
1340  unsigned old_base_el_no =
1341  old_base_element_number_plus_one[el_pt] - 1;
1342  new_base_element_associated_with_old_base_element
1343  [old_base_el_no]
1344  .push_back(el_pt);
1345  }
1346  // Refineable
1347  else
1348  {
1349  // Is it a tree root (after pruning)? In that case it's
1350  // a new base element
1351  if (dynamic_cast<TreeRoot*>(ref_el_pt->tree_pt()))
1352  {
1353  unsigned old_base_el_no =
1354  old_base_element_number_plus_one[el_pt] - 1;
1355  new_base_element_associated_with_old_base_element
1356  [old_base_el_no]
1357  .push_back(el_pt);
1358  }
1359  else
1360  {
1361  // Get associated root element
1362  FiniteElement* root_el_pt =
1363  ref_el_pt->tree_pt()->root_pt()->object_pt();
1364 
1365  if (!root_el_done[root_el_pt])
1366  {
1367  root_el_done[root_el_pt] = true;
1368  unsigned old_base_el_no =
1369  old_base_element_number_plus_one[el_pt] - 1;
1370  new_base_element_associated_with_old_base_element
1371  [old_base_el_no]
1372  .push_back(root_el_pt);
1373  }
1374  }
1375  }
1376  } // for (e < nele_submesh)
1377  } // if (is_structured_mesh[i_mesh])
1378  } // for (i_mesh < n_mesh)
1379 
1380  // Create a vector that stores how many new root/base elements
1381  // got spawned from each old root/base element in the global mesh
1382  Vector<unsigned> local_n_new_root(nel_base_old);
1383 #ifdef PARANOID
1384  Vector<unsigned> n_new_root_back(nel_base_old);
1385 #endif
1386  for (unsigned e = 0; e < nel_base_old; e++)
1387  {
1388  local_n_new_root[e] =
1389  new_base_element_associated_with_old_base_element[e].size();
1390 
1391 #ifdef PARANOID
1392  // Backup so we can check that halo data was consistent
1393  n_new_root_back[e] = local_n_new_root[e];
1394 #endif
1395  }
1396 
1398  {
1399  t_end = TimingHelpers::timer();
1400  oomph_info << "Time for setup of new base elements in "
1401  << "Problem::prune_halo_elements_and_nodes(): "
1402  << t_end - t_start << std::endl;
1403  t_start = TimingHelpers::timer();
1404  }
1405 
1406  // Now do reduce operation to get information for all
1407  // old root/base elements -- the pruned (halo!) base elements contain
1408  // fewer associated new roots.
1409  Vector<unsigned> n_new_root(nel_base_old);
1410  MPI_Allreduce(&local_n_new_root[0],
1411  &n_new_root[0],
1412  nel_base_old,
1413  MPI_UNSIGNED,
1414  MPI_MAX,
1415  this->communicator_pt()->mpi_comm());
1416 
1417 
1419  {
1420  t_end = TimingHelpers::timer();
1421  oomph_info << "Time for allreduce in "
1422  << "Problem::prune_halo_elements_and_nodes(): "
1423  << t_end - t_start << std::endl;
1424  t_start = TimingHelpers::timer();
1425  }
1426 
1427  // Find out total number of base elements
1428  unsigned nel_base_new = 0;
1429  for (unsigned e = 0; e < nel_base_old; e++)
1430  {
1431  // Increment
1432  nel_base_new += n_new_root[e];
1433 
1434 #ifdef PARANOID
1435  // If we already had data for this root previously then
1436  // the data ought to be consistent afterwards (since taking
1437  // the max of consistent numbers shouldn't change things -- this
1438  // deals with halo/haloed elements)
1439  if (!old_root_is_halo_or_non_existent[e])
1440  {
1441  if (n_new_root_back[e] != 0)
1442  {
1443  if (n_new_root_back[e] != n_new_root[e])
1444  {
1445  std::ostringstream error_stream;
1446  error_stream
1447  << "Number of new root elements spawned from old root " << e
1448  << ": " << n_new_root[e] << "\nis not consistent"
1449  << " with previous value: " << n_new_root_back[e]
1450  << std::endl;
1451  throw OomphLibError(error_stream.str(),
1452  OOMPH_CURRENT_FUNCTION,
1453  OOMPH_EXCEPTION_LOCATION);
1454  }
1455  }
1456  }
1457 
1458 #endif
1459  }
1460 
1461  // Reset base_mesh information
1462  Base_mesh_element_pt.clear();
1463  Base_mesh_element_pt.resize(nel_base_new, 0);
1465 
1466  // Now enumerate the new base/root elements consistently
1467  unsigned count = 0;
1468  for (unsigned e = 0; e < nel_base_old; e++)
1469  {
1470  // Old root is non-halo: Just add the new roots into the
1471  // new lookup scheme consecutively
1472  if (!old_root_is_halo_or_non_existent[e])
1473  {
1474  // Loop over new root/base element
1475  unsigned n_new_root =
1476  new_base_element_associated_with_old_base_element[e].size();
1477  for (unsigned j = 0; j < n_new_root; j++)
1478  {
1479  // Store new root/base element
1480  GeneralisedElement* el_pt =
1481  new_base_element_associated_with_old_base_element[e][j];
1482  Base_mesh_element_pt[count] = el_pt;
1483  Base_mesh_element_number_plus_one[el_pt] = count + 1;
1484 
1485  // Bump counter
1486  count++;
1487  }
1488  }
1489  // Old root element is halo so skip insertion (i.e. leave
1490  // entries in lookup schemes nulled) but increase counter to
1491  // ensure consistency between processors
1492  else
1493  {
1494  unsigned nskip = n_new_root[e];
1495  count += nskip;
1496  }
1497  }
1498 
1499  // Re-setup the map between "root" element and number in global mesh
1500  // (used in the load_balance() routines)
1502 
1503 
1505  {
1506  t_end = TimingHelpers::timer();
1507  oomph_info << "Time for finishing off base mesh info "
1508  << "Problem::prune_halo_elements_and_nodes(): "
1509  << t_end - t_start << std::endl;
1510  t_start = TimingHelpers::timer();
1511  }
1512 
1513 
1514  // Call actions after distribute
1516 
1517 
1519  {
1520  t_end = TimingHelpers::timer();
1521  oomph_info << "Time for actions_after_distribute() "
1522  << "Problem::prune_halo_elements_and_nodes(): "
1523  << t_end - t_start << std::endl;
1524  t_start = TimingHelpers::timer();
1525  }
1526 
1527 
1528  // Re-assign the equation numbers (incl synchronisation if reqd)
1529 #ifdef PARANOID
1530  unsigned n_dof = assign_eqn_numbers();
1531 #else
1533 #endif
1534 
1535 
1537  {
1538  t_end = TimingHelpers::timer();
1539  oomph_info << "Time for assign_eqn_numbers() "
1540  << "Problem::prune_halo_elements_and_nodes(): "
1541  << t_end - t_start << std::endl;
1542  t_start = TimingHelpers::timer();
1543  }
1544 
1545 
1546 #ifdef PARANOID
1548  {
1549  if (n_dof != old_ndof)
1550  {
1551  std::ostringstream error_stream;
1552  error_stream
1553  << "Number of dofs in prune_halo_elements_and_nodes() has "
1554  "changed "
1555  << "from " << old_ndof << " to " << n_dof << "\n"
1556  << "Check that you've implemented any necessary "
1557  "actions_before/after"
1558  << "\nadapt/distribute functions, e.g. to pin redundant pressure"
1559  << " dofs etc.\n";
1560  throw OomphLibError(error_stream.str(),
1561  OOMPH_CURRENT_FUNCTION,
1562  OOMPH_EXCEPTION_LOCATION);
1563  }
1564  }
1565 #endif
1566  }
1567  }
1568  }
1569 
1570 
1571 #endif
1572 
1573 
1574  //===================================================================
1575  /// Build a single (global) mesh from a number
1576  /// of submeshes which are passed as a vector of pointers to the
1577  /// submeshes. The ordering is not necessarily optimal.
1578  //==============================================================
1580  {
1581 #ifdef PARANOID
1582  // Has a global mesh already been built
1583  if (Mesh_pt != 0)
1584  {
1585  std::string error_message = "Problem::build_global_mesh() called,\n";
1586  error_message += " but a global mesh has already been built:\n";
1587  error_message += "Problem::Mesh_pt is not zero!\n";
1588 
1589  throw OomphLibError(
1590  error_message, OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
1591  }
1592  // Check that there are submeshes
1593  if (Sub_mesh_pt.size() == 0)
1594  {
1595  std::string error_message = "Problem::build_global_mesh() called,\n";
1596  error_message += " but there are no submeshes:\n";
1597  error_message += "Problem::Sub_mesh_pt has no entries\n";
1598 
1599  throw OomphLibError(
1600  error_message, OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
1601  }
1602 #endif
1603 
1604  // Create an empty mesh
1605  Mesh_pt = new Mesh();
1606 
1607  // Call the rebuild function to construct the mesh
1609  }
1610 
1611  //====================================================================
1612  /// If one of the submeshes has changed (e.g. by
1613  /// mesh adaptation) we need to update the global mesh.
1614  /// \b Note: The nodes boundary information refers to the
1615  /// boundary numbers within the submesh!
1616  /// N.B. This is essentially the same function as the Mesh constructor
1617  /// that assembles a single global mesh from submeshes
1618  //=====================================================================
1620  {
1621  // Use the function in mesh to merge the submeshes into this one
1623  }
1624 
1625 
1626  //================================================================
1627  /// Add a timestepper to the problem. The function will automatically
1628  /// create or resize the Time object so that it contains the appropriate
1629  /// number of levels of storage.
1630  //================================================================
1631  void Problem::add_time_stepper_pt(TimeStepper* const& time_stepper_pt)
1632  {
1633  // Add the timestepper to the vector
1634  Time_stepper_pt.push_back(time_stepper_pt);
1635 
1636  // Find the number of timesteps required by the timestepper
1637  unsigned ndt = time_stepper_pt->ndt();
1638 
1639  // If time has not been allocated, create time object with the
1640  // required number of time steps
1641  if (Time_pt == 0)
1642  {
1643  Time_pt = new Time(ndt);
1644  oomph_info << "Created Time with " << ndt << " timesteps" << std::endl;
1645  }
1646  else
1647  {
1648  // If the required number of time steps is greater than currently stored
1649  // resize the time storage
1650  if (ndt > Time_pt->ndt())
1651  {
1652  Time_pt->resize(ndt);
1653  oomph_info << "Resized Time to include " << ndt << " timesteps"
1654  << std::endl;
1655  }
1656  // Otherwise report that we are OK
1657  else
1658  {
1659  oomph_info << "Time object already has storage for " << ndt
1660  << " timesteps" << std::endl;
1661  }
1662  }
1663 
1664  // Pass the pointer to time to the timestepper
1666  }
1667 
1668  //================================================================
1669  /// Set the explicit time stepper for the problem and also
1670  /// ensure that a time object has been created.
1671  //================================================================
1673  ExplicitTimeStepper* const& explicit_time_stepper_pt)
1674  {
1675  // Set the explicit time stepper
1677 
1678  // If time has not been allocated, create time object with the
1679  // required number of time steps
1680  if (Time_pt == 0)
1681  {
1682  Time_pt = new Time(0);
1683  oomph_info << "Created Time with storage for no previous timestep"
1684  << std::endl;
1685  }
1686  else
1687  {
1688  oomph_info << "Time object already exists " << std::endl;
1689  }
1690  }
1691 
1692 
1693 #ifdef OOMPH_HAS_MPI
1694 
1695  //================================================================
1696  /// Set default first and last elements for parallel assembly
1697  /// of non-distributed problem.
1698  //================================================================
1700  {
1702  {
1703  // Minimum number of elements per processor if there are fewer elements
1704  // than processors
1705  unsigned min_el = 10;
1706 
1707  // Resize and make default assignments
1708  int n_proc = this->communicator_pt()->nproc();
1709  unsigned n_elements = Mesh_pt->nelement();
1710  First_el_for_assembly.resize(n_proc, 0);
1711  Last_el_plus_one_for_assembly.resize(n_proc, 0);
1712 
1713  // In the absence of any better knowledge distribute work evenly
1714  // over elements
1715  unsigned range = 0;
1716  unsigned lo_proc = 0;
1717  unsigned hi_proc = n_proc - 1;
1718  if (int(n_elements) >= n_proc)
1719  {
1720  range = unsigned(double(n_elements) / double(n_proc));
1721  }
1722  else
1723  {
1724  range = min_el;
1725  lo_proc = 0;
1726  hi_proc = unsigned(double(n_elements) / double(min_el));
1727  }
1728 
1729  for (int p = lo_proc; p <= int(hi_proc); p++)
1730  {
1731  First_el_for_assembly[p] = p * range;
1732 
1733  unsigned last_el_plus_one = (p + 1) * range;
1734  if (last_el_plus_one > n_elements) last_el_plus_one = n_elements;
1735  Last_el_plus_one_for_assembly[p] = last_el_plus_one;
1736  }
1737 
1738  // Last one needs to incorporate any dangling elements
1739  if (int(n_elements) >= n_proc)
1740  {
1741  Last_el_plus_one_for_assembly[n_proc - 1] = n_elements;
1742  }
1743 
1744  // Doc
1745  if (n_proc > 1)
1746  {
1748  {
1749  oomph_info << "Problem is not distributed. Parallel assembly of "
1750  << "Jacobian uses default partitioning: " << std::endl;
1751  for (int p = 0; p < n_proc; p++)
1752  {
1753  if (Last_el_plus_one_for_assembly[p] != 0)
1754  {
1755  oomph_info << "Proc " << p << " assembles from element "
1756  << First_el_for_assembly[p] << " to "
1757  << Last_el_plus_one_for_assembly[p] - 1 << " \n";
1758  }
1759  else
1760  {
1761  oomph_info << "Proc " << p << " assembles no elements\n";
1762  }
1763  }
1764  }
1765  }
1766  }
1767  }
1768 
1769 
1770  //=======================================================================
1771  /// Helper function to re-assign the first and last elements to be
1772  /// assembled by each processor during parallel assembly for
1773  /// non-distributed problem.
1774  //=======================================================================
1776  {
1777  // Wait until all processes have completed/timed their assembly
1778  MPI_Barrier(this->communicator_pt()->mpi_comm());
1779 
1780  // Storage for number of processors and current processor
1781  int n_proc = this->communicator_pt()->nproc();
1782  int rank = this->communicator_pt()->my_rank();
1783 
1784  // Don't bother to update if we've got fewer elements than
1785  // processors
1786  unsigned nel = Elemental_assembly_time.size();
1787  if (int(nel) < n_proc)
1788  {
1789  oomph_info << "Not re-computing distribution of elemental assembly\n"
1790  << "because there are fewer elements than processors\n";
1791  return;
1792  }
1793 
1794  // Setup vectors storing the number of element timings to be sent
1795  // and the offset in the final vector
1796  Vector<int> receive_count(n_proc);
1797  Vector<int> displacement(n_proc);
1798  int offset = 0;
1799  for (int p = 0; p < n_proc; p++)
1800  {
1801  // Default distribution of labour
1802  unsigned el_lo = First_el_for_assembly[p];
1803  unsigned el_hi = Last_el_plus_one_for_assembly[p] - 1;
1804 
1805  // Number of timings to be sent and offset from start in
1806  // final vector
1807  receive_count[p] = el_hi - el_lo + 1;
1808  displacement[p] = offset;
1809  offset += el_hi - el_lo + 1;
1810  }
1811 
1812  // Make temporary c-style array to avoid over-writing in Gatherv below
1813  double* el_ass_time = new double[nel];
1814  for (unsigned e = 0; e < nel; e++)
1815  {
1816  el_ass_time[e] = Elemental_assembly_time[e];
1817  }
1818 
1819  // Gather timings on root processor
1820  unsigned nel_local =
1822  MPI_Gatherv(&el_ass_time[First_el_for_assembly[rank]],
1823  nel_local,
1824  MPI_DOUBLE,
1826  &receive_count[0],
1827  &displacement[0],
1828  MPI_DOUBLE,
1829  0,
1830  this->communicator_pt()->mpi_comm());
1831  delete[] el_ass_time;
1832 
1833  // Vector of first and last elements for each processor
1834  Vector<Vector<int>> first_and_last_element(n_proc);
1835  for (int p = 0; p < n_proc; p++)
1836  {
1837  first_and_last_element[p].resize(2);
1838  }
1839 
1840  // Re-distribute work
1841  if (rank == 0)
1842  {
1844  {
1845  oomph_info
1846  << std::endl
1847  << "Re-assigning distribution of element assembly over processors:"
1848  << std::endl;
1849  }
1850 
1851  // Get total assembly time
1852  double total = 0.0;
1853  unsigned n_elements = Mesh_pt->nelement();
1854  for (unsigned e = 0; e < n_elements; e++)
1855  {
1856  total += Elemental_assembly_time[e];
1857  }
1858 
1859  // Target load per processor
1860  double target_load = total / double(n_proc);
1861 
1862  // We're on the root processor: Always start with the first element
1863  int proc = 0;
1864  first_and_last_element[0][0] = 0;
1865 
1866  // Highest element we can help ourselves to if we want to leave
1867  // at least one element for all subsequent processors
1868  unsigned max_el_avail = n_elements - n_proc;
1869 
1870  // Initialise total work allocated
1871  total = 0.0;
1872  for (unsigned e = 0; e < n_elements; e++)
1873  {
1874  total += Elemental_assembly_time[e];
1875 
1876  // Once we have reached the target load or we've used up practically
1877  // all the elements...
1878  if ((total > target_load) || (e == max_el_avail))
1879  {
1880  // Last element for current processor
1881  first_and_last_element[proc][1] = e;
1882 
1883  // Provided that we are not on the last processor
1884  if (proc < (n_proc - 1))
1885  {
1886  // Set first element for next one
1887  first_and_last_element[proc + 1][0] = e + 1;
1888 
1889  // Move on to the next processor
1890  proc++;
1891  }
1892 
1893  // Can have one more...
1894  max_el_avail++;
1895 
1896  // Re-initialise the time
1897  total = 0.0;
1898  } // end of test for "total exceeds target"
1899  }
1900 
1901 
1902  // Last element for last processor
1903  first_and_last_element[n_proc - 1][1] = n_elements - 1;
1904 
1905 
1906  // The following block should probably be paranoidified away
1907  // but we've screwed the logic up so many times that I feel
1908  // it's safer to keep it...
1909  bool wrong = false;
1910  std::ostringstream error_stream;
1911  for (int p = 0; p < n_proc - 1; p++)
1912  {
1913  unsigned first_of_current = first_and_last_element[p][0];
1914  unsigned last_of_current = first_and_last_element[p][1];
1915  if (first_of_current > last_of_current)
1916  {
1917  wrong = true;
1918  error_stream << "Error: First/last element of proc " << p << ": "
1919  << first_of_current << " " << last_of_current
1920  << std::endl;
1921  }
1922  unsigned first_of_next = first_and_last_element[p + 1][0];
1923  if (first_of_next != (last_of_current + 1))
1924  {
1925  wrong = true;
1926  error_stream << "Error: First element of proc " << p + 1 << ": "
1927  << first_of_next << " and last element of proc " << p
1928  << ": " << last_of_current << std::endl;
1929  }
1930  }
1931  if (wrong)
1932  {
1933  throw OomphLibError(
1934  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
1935  }
1936 
1937 
1938  // THIS TIDY UP SHOULD NO LONGER BE REQUIRED AND CAN GO AT SOME POINT
1939 
1940  // //If we haven't got to the end of the processor list then
1941  // //need to shift things about slightly because the processors
1942  // //at the end will be empty.
1943  // //This can occur when you have very fast assembly times and the
1944  // //rounding errors mean that the targets are achieved before all
1945  // processors
1946  // //have been visited.
1947  // //Happens a lot when you massively oversubscribe the CPUs (which was
1948  // //only ever for testing!)
1949  // if (proc!=n_proc-1)
1950  // {
1951  // oomph_info
1952  // << "First pass did not allocate elements on every processor\n";
1953  // oomph_info <<
1954  // "Moving elements so that each processor has at least one\n";
1955 
1956  // //Work out number of empty processos
1957  // unsigned n_empty_processors = n_proc - proc + 1;
1958 
1959  // //Loop over the processors that do have elements
1960  // //and work out how many we need to steal elements from
1961  // unsigned n_element_on_processors=0;
1962  // do
1963  // {
1964  // //Step down the processors
1965  // --proc;
1966  // //Add the current processor to the number of empty processors
1967  // //because the elements have to be shared between processors
1968  // //including the one(s) on which they are currently stored.
1969  // ++n_empty_processors;
1970  // n_element_on_processors +=
1971  // (first_and_last_element[proc][1] -
1972  // first_and_last_element[proc][0] + 1);
1973  // }
1974  // while(n_element_on_processors < n_empty_processors);
1975 
1976  // //Should now be able to put one element on each processor
1977  // //Start from the end and do so
1978  // unsigned current_element = n_elements-1;
1979  // for(int p=n_proc-1;p>proc;p--)
1980  // {
1981  // first_and_last_element[p][1] = current_element;
1982  // first_and_last_element[p][0] = --current_element;
1983  // }
1984 
1985  // //Now for the last processor we touched, just adjust the final value
1986  // first_and_last_element[proc][1] = current_element;
1987  // }
1988  // //Otherwise just put the rest of the elements on the final
1989  // //processor
1990  // else
1991  // {
1992  // // Last one
1993  // first_and_last_element[n_proc-1][1]=n_elements-1;
1994  // }
1995 
1996 
1997  // END PRESUMED-TO-BE-UNNECESSARY BLOCK...
1998 
1999 
2000  // Now communicate the information
2001 
2002  // Set local informationt for this (root) processor
2003  First_el_for_assembly[0] = first_and_last_element[0][0];
2004  Last_el_plus_one_for_assembly[0] = first_and_last_element[0][1] + 1;
2005 
2007  {
2008  oomph_info << "Processor " << 0 << " assembles Jacobians"
2009  << " from elements " << first_and_last_element[0][0]
2010  << " to " << first_and_last_element[0][1] << " "
2011  << std::endl;
2012  }
2013 
2014  // Only now can we send the information to the other processors
2015  for (int p = 1; p < n_proc; ++p)
2016  {
2017  MPI_Send(&first_and_last_element[p][0],
2018  2,
2019  MPI_INT,
2020  p,
2021  0,
2022  this->communicator_pt()->mpi_comm());
2023 
2024 
2026  {
2027  oomph_info << "Processor " << p << " assembles Jacobians"
2028  << " from elements " << first_and_last_element[p][0]
2029  << " to " << first_and_last_element[p][1] << " "
2030  << std::endl;
2031  }
2032  }
2033  }
2034  // Receive first and last element from root on non-master processors
2035  else
2036  {
2037  Vector<int> aux(2);
2038  MPI_Status status;
2039  MPI_Recv(&aux[0],
2040  2,
2041  MPI_INT,
2042  0,
2043  0,
2044  this->communicator_pt()->mpi_comm(),
2045  &status);
2046  First_el_for_assembly[rank] = aux[0];
2047  Last_el_plus_one_for_assembly[rank] = aux[1] + 1;
2048  }
2049 
2050  // Wipe all others
2051  for (int p = 0; p < n_proc; p++)
2052  {
2053  if (p != rank)
2054  {
2055  First_el_for_assembly[p] = 0;
2057  }
2058  }
2059 
2060  // The equations assembled by this processor may have changed so
2061  // we must resize the sparse assemble with arrays previous allocation
2063  }
2064 
2065 #endif
2066 
2067  //================================================================
2068  /// Assign all equation numbers for problem: Deals with global
2069  /// data (= data that isn't attached to any elements) and then
2070  /// does the equation numbering for the elements. Bool argument
2071  /// can be set to false to ignore assigning local equation numbers
2072  /// (necessary in the parallel implementation of locate_zeta
2073  /// between multiple meshes).
2074  //================================================================
2076  const bool& assign_local_eqn_numbers)
2077  {
2078  // Check that the global mesh has been build
2079 #ifdef PARANOID
2080  if (Mesh_pt == 0)
2081  {
2082  std::ostringstream error_stream;
2083  error_stream << "Global mesh does not exist, so equation numbers cannot "
2084  "be assigned.\n";
2085  // Check for sub meshes
2086  if (nsub_mesh() == 0)
2087  {
2088  error_stream << "There aren't even any sub-meshes in the Problem.\n"
2089  << "You can set the global mesh directly by using\n"
2090  << "Problem::mesh_pt() = my_mesh_pt;\n"
2091  << "OR you can use Problem::add_sub_mesh(mesh_pt); "
2092  << "to add a sub mesh.\n";
2093  }
2094  else
2095  {
2096  error_stream << "There are " << nsub_mesh() << " sub-meshes.\n";
2097  }
2098  error_stream << "You need to call Problem::build_global_mesh() to create "
2099  "a global mesh\n"
2100  << "from the sub-meshes.\n\n";
2101 
2102  throw OomphLibError(
2103  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2104  }
2105 #endif
2106 
2107  // Number of submeshes
2108  unsigned n_sub_mesh = Sub_mesh_pt.size();
2109 
2110 #ifdef OOMPH_HAS_MPI
2111 
2112  // Storage for number of processors
2113  int n_proc = this->communicator_pt()->nproc();
2114 
2115 
2116  if (n_proc > 1)
2117  {
2118  // Force re-analysis of time spent on assembly each
2119  // elemental Jacobian
2121  Elemental_assembly_time.clear();
2122  }
2123  else
2124  {
2126  }
2127 
2128  // Re-distribution of elements over processors during assembly
2129  // must be recomputed
2131  {
2132  // Set default first and last elements for parallel assembly
2133  // of non-distributed problem.
2135  }
2136 
2137 #endif
2138 
2139 
2140  double t_start = 0.0;
2142  {
2143  t_start = TimingHelpers::timer();
2144  }
2145 
2146  // Loop over all elements in the mesh and set up any additional
2147  // dependencies that they may have (e.g. storing the geometric
2148  // Data, i.e. Data that affects an element's shape in elements
2149  // with algebraic node-update functions
2150  unsigned nel = Mesh_pt->nelement();
2151  for (unsigned e = 0; e < nel; e++)
2152  {
2154  }
2155 
2156 #ifdef OOMPH_HAS_MPI
2157  // Complete setup of dependencies for external halo elements too
2158  unsigned n_mesh = this->nsub_mesh();
2159  for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
2160  {
2161  for (int iproc = 0; iproc < n_proc; iproc++)
2162  {
2163  unsigned n_ext_halo_el = mesh_pt(i_mesh)->nexternal_halo_element(iproc);
2164  for (unsigned e = 0; e < n_ext_halo_el; e++)
2165  {
2166  mesh_pt(i_mesh)
2167  ->external_halo_element_pt(iproc, e)
2169  }
2170  }
2171  }
2172 #endif
2173 
2174 
2175  double t_end = 0.0;
2177  {
2178  t_end = TimingHelpers::timer();
2179  oomph_info
2180  << "Time for complete setup of dependencies in assign_eqn_numbers: "
2181  << t_end - t_start << std::endl;
2182  }
2183 
2184 
2185  // Initialise number of dofs for reserve below
2186  unsigned n_dof = 0;
2187 
2188  // Potentially loop over remainder of routine, possible re-visiting all
2189  // those parts that must be redone, following the removal of duplicate
2190  // external halo data.
2191  for (unsigned loop_count = 0; loop_count < 2; loop_count++)
2192  {
2193  //(Re)-set the dof pointer to zero length because entries are
2194  // pushed back onto it -- if it's not reset here then we get into
2195  // trouble during mesh refinement when we reassign all dofs
2196  Dof_pt.resize(0);
2197 
2198  // Reserve from previous allocation if we're going around again
2199  Dof_pt.reserve(n_dof);
2200 
2201  // Reset the equation number
2202  unsigned long equation_number = 0;
2203 
2204  // Now set equation numbers for the global Data
2205  unsigned Nglobal_data = nglobal_data();
2206  for (unsigned i = 0; i < Nglobal_data; i++)
2207  {
2208  Global_data_pt[i]->assign_eqn_numbers(equation_number, Dof_pt);
2209  }
2210 
2212  {
2213  t_start = TimingHelpers::timer();
2214  }
2215 
2216  // Call assign equation numbers on the global mesh
2218 
2219  // Deal with the spine meshes additional numbering
2220  // If there is only one mesh
2221  if (n_sub_mesh == 0)
2222  {
2223  if (SpineMesh* const spine_mesh_pt = dynamic_cast<SpineMesh*>(Mesh_pt))
2224  {
2225  n_dof = spine_mesh_pt->assign_global_spine_eqn_numbers(Dof_pt);
2226  }
2227  }
2228  // Otherwise loop over the sub meshes
2229  else
2230  {
2231  // Assign global equation numbers first
2232  for (unsigned i = 0; i < n_sub_mesh; i++)
2233  {
2234  if (SpineMesh* const spine_mesh_pt =
2235  dynamic_cast<SpineMesh*>(Sub_mesh_pt[i]))
2236  {
2237  n_dof = spine_mesh_pt->assign_global_spine_eqn_numbers(Dof_pt);
2238  }
2239  }
2240  }
2241 
2243  {
2244  t_end = TimingHelpers::timer();
2245  oomph_info
2246  << "Time for assign_global_eqn_numbers in assign_eqn_numbers: "
2247  << t_end - t_start << std::endl;
2248  t_start = TimingHelpers::timer();
2249  }
2250 
2251 
2252 #ifdef OOMPH_HAS_MPI
2253 
2254  // reset previous allocation
2256 
2257  // Only synchronise if the problem has actually been
2258  // distributed.
2260  {
2261  // Synchronise the equation numbers and return the total
2262  // number of degrees of freedom in the overall problem
2263  // Do not assign local equation numbers -- we're doing this
2264  // below.
2265  n_dof = synchronise_eqn_numbers(false);
2266  }
2267  // ..else just setup the Dof_distribution_pt
2268  // NOTE: this is setup by synchronise_eqn_numbers(...)
2269  // if Problem_has_been_distributed
2270  else
2271 #endif
2272  {
2273  Dof_distribution_pt->build(Communicator_pt, n_dof, false);
2274  }
2275 
2277  {
2278  t_end = TimingHelpers::timer();
2279  oomph_info << "Time for Problem::synchronise_eqn_numbers in "
2280  << "Problem::assign_eqn_numbers: " << t_end - t_start
2281  << std::endl;
2282  }
2283 
2284 
2285 #ifdef OOMPH_HAS_MPI
2286 
2287 
2288  // Now remove duplicate data in external halo elements
2290  {
2292  {
2293  t_start = TimingHelpers::timer();
2294  }
2295 
2296  // Monitor if we've actually changed anything
2297  bool actually_removed_some_data = false;
2298 
2299  // Only do it once!
2300  if (loop_count == 0)
2301  {
2302  if (n_sub_mesh == 0)
2303  {
2304  remove_duplicate_data(Mesh_pt, actually_removed_some_data);
2305  }
2306  else
2307  {
2308  for (unsigned i = 0; i < n_sub_mesh; i++)
2309  {
2310  bool tmp_actually_removed_some_data = false;
2312  tmp_actually_removed_some_data);
2313  if (tmp_actually_removed_some_data)
2314  actually_removed_some_data = true;
2315  }
2316  }
2317  }
2318 
2319 
2321  {
2322  t_end = TimingHelpers::timer();
2323  std::stringstream tmp;
2324  tmp << "Time for calls to Problem::remove_duplicate_data in "
2325  << "Problem::assign_eqn_numbers: " << t_end - t_start
2326  << " ; have ";
2327  if (!actually_removed_some_data)
2328  {
2329  tmp << " not ";
2330  }
2331  tmp << " removed some/any data.\n";
2332  oomph_info << tmp.str();
2333  t_start = TimingHelpers::timer();
2334  }
2335 
2336  // Break out of the loop if we haven't done anything here.
2337  unsigned status = 0;
2338  if (actually_removed_some_data) status = 1;
2339 
2340  // Allreduce to check if anyone has removed any data
2341  unsigned overall_status = 0;
2342  MPI_Allreduce(&status,
2343  &overall_status,
2344  1,
2345  MPI_UNSIGNED,
2346  MPI_MAX,
2347  this->communicator_pt()->mpi_comm());
2348 
2349 
2351  {
2352  t_end = TimingHelpers::timer();
2353  std::stringstream tmp;
2354  tmp
2355  << "Time for MPI_Allreduce after Problem::remove_duplicate_data in "
2356  << "Problem::assign_eqn_numbers: " << t_end - t_start << std::endl;
2357  oomph_info << tmp.str();
2358  t_start = TimingHelpers::timer();
2359  }
2360 
2361  // Bail out if we haven't done anything here
2362  if (overall_status != 1)
2363  {
2364  break;
2365  }
2366 
2367  // Big tidy up: Remove null pointers from halo/haloed node storage
2368  // for all meshes (this involves comms and therefore must be
2369  // performed outside loop over meshes so the all-to-all is only
2370  // done once)
2372 
2373  // Time it...
2375  {
2376  double t_end = TimingHelpers::timer();
2377  oomph_info << "Total time for "
2378  << "Problem::remove_null_pointers_from_external_halo_node_"
2379  "storage(): "
2380  << t_end - t_start << std::endl;
2381  }
2382  }
2383  else
2384  {
2385  // Problem not distributed; no need for another loop
2386  break;
2387  }
2388 
2389 #else
2390 
2391  // Serial run: Again no need for a second loop
2392  break;
2393 
2394 #endif
2395 
2396  } // end of loop over fcts that need to be re-executed if
2397  // we've removed duplicate external data
2398 
2399 
2400  // Resize the sparse assemble with arrays previous allocation
2402 
2403 
2405  {
2406  t_start = TimingHelpers::timer();
2407  }
2408 
2409  // Finally assign local equations
2410  if (assign_local_eqn_numbers)
2411  {
2412  if (n_sub_mesh == 0)
2413  {
2415  }
2416  else
2417  {
2418  for (unsigned i = 0; i < n_sub_mesh; i++)
2419  {
2420  Sub_mesh_pt[i]->assign_local_eqn_numbers(
2422  }
2423  }
2424  }
2425 
2427  {
2428  t_end = TimingHelpers::timer();
2429  oomph_info << "Total time for all Mesh::assign_local_eqn_numbers in "
2430  << "Problem::assign_eqn_numbers: " << t_end - t_start
2431  << std::endl;
2432  }
2433 
2434 
2435  // and return the total number of DOFs
2436  return n_dof;
2437  }
2438  //================================================================
2439  /// Function to describe the dofs in terms of the global
2440  /// equation number, i.e. what type of value (nodal value of
2441  /// a Node; value in a Data object; value of internal Data in an
2442  /// element; etc) is the unknown with a certain global equation number.
2443  /// Output stream defaults to oomph_info.
2444  //================================================================
2445  void Problem::describe_dofs(std::ostream& out) const
2446  {
2447  // Check that the global mesh has been build
2448 #ifdef PARANOID
2449  if (Mesh_pt == 0)
2450  {
2451  std::ostringstream error_stream;
2452  error_stream
2453  << "Global mesh does not exist, so equation numbers cannot be found.\n";
2454  // Check for sub meshes
2455  if (nsub_mesh() == 0)
2456  {
2457  error_stream << "There aren't even any sub-meshes in the Problem.\n"
2458  << "You can set the global mesh directly by using\n"
2459  << "Problem::mesh_pt() = my_mesh_pt;\n"
2460  << "OR you can use Problem::add_sub_mesh(mesh_pt); "
2461  << "to add a sub mesh.\n";
2462  }
2463  else
2464  {
2465  error_stream << "There are " << nsub_mesh() << " sub-meshes.\n";
2466  }
2467  error_stream << "You need to call Problem::build_global_mesh() to create "
2468  "a global mesh\n"
2469  << "from the sub-meshes.\n\n";
2470 
2471  throw OomphLibError(
2472  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2473  }
2474 #endif
2475 
2476  out
2477  << "Although this program will describe the degrees of freedom in the \n"
2478  << "problem, it will do so using the typedef for the elements. This is \n"
2479  << "not neccesarily human readable, but there is a solution.\n"
2480  << "Pipe your program's output through c++filt, with the argument -t.\n"
2481  << "e.g. \"./two_d_multi_poisson | c++filt -t > ReadableOutput.txt\".\n "
2482  << "(Disregarding the quotes)\n\n\n";
2483 
2484  out << "Classifying Global Equation Numbers" << std::endl;
2485  out << std::string(80, '-') << std::endl;
2486 
2487  // Number of submeshes
2488  unsigned n_sub_mesh = Sub_mesh_pt.size();
2489 
2490  // Classify Global dofs
2491  unsigned Nglobal_data = nglobal_data();
2492  for (unsigned i = 0; i < Nglobal_data; i++)
2493  {
2494  std::stringstream conversion;
2495  conversion << " in Global Data " << i << ".";
2496  std::string in(conversion.str());
2497  Global_data_pt[i]->describe_dofs(out, in);
2498  }
2499 
2500  // Put string in limiting scope.
2501  {
2502  // Descend into assignment for mesh.
2503  std::string in(" in Problem's Only Mesh.");
2504  Mesh_pt->describe_dofs(out, in);
2505  }
2506 
2507  // Deal with the spine meshes additional numbering:
2508  // If there is only one mesh:
2509  if (n_sub_mesh == 0)
2510  {
2511  if (SpineMesh* const spine_mesh_pt = dynamic_cast<SpineMesh*>(Mesh_pt))
2512  {
2513  std::string in(" in Problem's Only SpineMesh.");
2514  spine_mesh_pt->describe_spine_dofs(out, in);
2515  }
2516  }
2517  // Otherwise loop over the sub meshes
2518  else
2519  {
2520  // Assign global equation numbers first
2521  for (unsigned i = 0; i < n_sub_mesh; i++)
2522  {
2523  if (SpineMesh* const spine_mesh_pt =
2524  dynamic_cast<SpineMesh*>(Sub_mesh_pt[i]))
2525  {
2526  std::stringstream conversion;
2527  conversion << " in Sub-SpineMesh " << i << ".";
2528  std::string in(conversion.str());
2529  spine_mesh_pt->describe_spine_dofs(out, in);
2530  } // end if.
2531  } // end for.
2532  } // end else.
2533 
2534 
2535  out << std::string(80, '\\') << std::endl;
2536  out << std::string(80, '\\') << std::endl;
2537  out << std::string(80, '\\') << std::endl;
2538  out << "Classifying global eqn numbers in terms of elements." << std::endl;
2539  out << std::string(80, '-') << std::endl;
2540  out << "Eqns | Source" << std::endl;
2541  out << std::string(80, '-') << std::endl;
2542 
2543  if (n_sub_mesh == 0)
2544  {
2545  std::string in(" in Problem's Only Mesh.");
2546  Mesh_pt->describe_local_dofs(out, in);
2547  }
2548  else
2549  {
2550  for (unsigned i = 0; i < n_sub_mesh; i++)
2551  {
2552  std::stringstream conversion;
2553  conversion << " in Sub-Mesh " << i << ".";
2554  std::string in(conversion.str());
2555  Sub_mesh_pt[i]->describe_local_dofs(out, in);
2556  } // End for
2557  } // End else
2558  } // End problem::describe_dofs(...)
2559 
2560 
2561  //================================================================
2562  /// Get the vector of dofs, i.e. a vector containing the current
2563  /// values of all unknowns.
2564  //================================================================
2566  {
2567  // Find number of dofs
2568  const unsigned long n_dof = ndof();
2569 
2570  // Resize the vector
2571  dofs.build(Dof_distribution_pt, 0.0);
2572 
2573  // Copy dofs into vector
2574  for (unsigned long l = 0; l < n_dof; l++)
2575  {
2576  dofs[l] = *Dof_pt[l];
2577  }
2578  }
2579 
2580  /// Get history values of dofs in a double vector.
2581  void Problem::get_dofs(const unsigned& t, DoubleVector& dofs) const
2582  {
2583 #ifdef PARANOID
2584  if (distributed())
2585  {
2586  throw OomphLibError("Not designed for distributed problems",
2587  OOMPH_EXCEPTION_LOCATION,
2588  OOMPH_CURRENT_FUNCTION);
2589  // might work, not sure
2590  }
2591 #endif
2592 
2593  // Resize the vector
2594  dofs.build(Dof_distribution_pt, 0.0);
2595 
2596  // First deal with global data
2597  unsigned Nglobal_data = nglobal_data();
2598  for (unsigned i = 0; i < Nglobal_data; i++)
2599  {
2600  for (unsigned j = 0, nj = Global_data_pt[i]->nvalue(); j < nj; j++)
2601  {
2602  // For each data get the equation number and copy out the value.
2603  int eqn_number = Global_data_pt[i]->eqn_number(j);
2604  if (eqn_number >= 0)
2605  {
2606  dofs[eqn_number] = Global_data_pt[i]->value(t, j);
2607  }
2608  }
2609  }
2610 
2611  // Next element internal data
2612  for (unsigned i = 0, ni = mesh_pt()->nelement(); i < ni; i++)
2613  {
2614  GeneralisedElement* ele_pt = mesh_pt()->element_pt(i);
2615  for (unsigned j = 0, nj = ele_pt->ninternal_data(); j < nj; j++)
2616  {
2617  Data* d_pt = ele_pt->internal_data_pt(j);
2618  for (unsigned k = 0, nk = d_pt->nvalue(); k < nk; k++)
2619  {
2620  int eqn_number = d_pt->eqn_number(k);
2621  if (eqn_number >= 0)
2622  {
2623  dofs[eqn_number] = d_pt->value(t, k);
2624  }
2625  }
2626  }
2627  }
2628 
2629  // Now the nodes
2630  for (unsigned i = 0, ni = mesh_pt()->nnode(); i < ni; i++)
2631  {
2632  Node* node_pt = mesh_pt()->node_pt(i);
2633  for (unsigned j = 0, nj = node_pt->nvalue(); j < nj; j++)
2634  {
2635  // For each node get the equation number and copy out the value.
2636  int eqn_number = node_pt->eqn_number(j);
2637  if (eqn_number >= 0)
2638  {
2639  dofs[eqn_number] = node_pt->value(t, j);
2640  }
2641  }
2642  }
2643  }
2644 
2645 
2646 #ifdef OOMPH_HAS_MPI
2647 
2648  //=======================================================================
2649  /// Private helper function to remove repeated data
2650  /// in external haloed elements associated with specified mesh.
2651  /// Bool is true if some data was removed -- this usually requires
2652  /// re-running through certain parts of the equation numbering procedure.
2653  //======================================================================
2654  void Problem::remove_duplicate_data(Mesh* const& mesh_pt,
2655  bool& actually_removed_some_data)
2656  {
2657  // // // Taken out again by MH -- clutters up output
2658  // // Doc timings if required
2659  // double t_start=0.0;
2660  // if (Global_timings::Doc_comprehensive_timings)
2661  // {
2662  // t_start=TimingHelpers::timer();
2663  // }
2664 
2665  int n_proc = this->communicator_pt()->nproc();
2666  int my_rank = this->communicator_pt()->my_rank();
2667 
2668  // Initialise
2669  actually_removed_some_data = false;
2670 
2671  // Each individual container of external halo nodes has unique
2672  // nodes/equation numbers, but there may be some duplication between
2673  // two or more different containers; the following code checks for this
2674  // and removes the duplication by overwriting any data point with an already
2675  // existing eqn number with the original data point which had the eqn no.
2676 
2677  // // Storage for existing nodes, enumerated by first non-negative
2678  // // global equation number
2679  // unsigned n_dof=ndof();
2680 
2681  // Note: This used to be
2682  // Vector<Node*> global_node_pt(n_dof,0);
2683  // but this is a total killer! Memory allocation is extremely
2684  // costly and only relatively few entries are used so use
2685  // map:
2686  std::map<unsigned, Node*> global_node_pt;
2687 
2688  // Only do each retained node once
2689  std::map<Node*, bool> node_done;
2690 
2691  // Loop over existing "normal" elements in mesh
2692  unsigned n_element = mesh_pt->nelement();
2693  for (unsigned e = 0; e < n_element; e++)
2694  {
2695  FiniteElement* el_pt =
2696  dynamic_cast<FiniteElement*>(mesh_pt->element_pt(e));
2697  if (el_pt != 0)
2698  {
2699  // Loop over nodes
2700  unsigned n_node = el_pt->nnode();
2701  for (unsigned j = 0; j < n_node; j++)
2702  {
2703  Node* nod_pt = el_pt->node_pt(j);
2704 
2705  // Have we already done the node?
2706  if (!node_done[nod_pt])
2707  {
2708  node_done[nod_pt] = true;
2709 
2710  // Loop over values stored at node (if any) to find
2711  // the first non-negative eqn number
2712  unsigned first_non_negative_eqn_number_plus_one = 0;
2713  unsigned n_val = nod_pt->nvalue();
2714  for (unsigned i_val = 0; i_val < n_val; i_val++)
2715  {
2716  int eqn_no = nod_pt->eqn_number(i_val);
2717  if (eqn_no >= 0)
2718  {
2719  first_non_negative_eqn_number_plus_one = eqn_no + 1;
2720  break;
2721  }
2722  }
2723 
2724  // If we haven't found a non-negative eqn number check
2725  // eqn numbers associated with solid data (if any)
2726  if (first_non_negative_eqn_number_plus_one == 0)
2727  {
2728  // Is it a solid node?
2729  SolidNode* solid_nod_pt = dynamic_cast<SolidNode*>(nod_pt);
2730  if (solid_nod_pt != 0)
2731  {
2732  // Loop over values stored at node (if any) to find
2733  // the first non-negative eqn number
2734  unsigned n_val = solid_nod_pt->variable_position_pt()->nvalue();
2735  for (unsigned i_val = 0; i_val < n_val; i_val++)
2736  {
2737  int eqn_no =
2738  solid_nod_pt->variable_position_pt()->eqn_number(i_val);
2739  if (eqn_no >= 0)
2740  {
2741  first_non_negative_eqn_number_plus_one = eqn_no + 1;
2742  break;
2743  }
2744  }
2745  }
2746  }
2747 
2748  // Associate node with first non negative global eqn number
2749  if (first_non_negative_eqn_number_plus_one > 0)
2750  {
2751  global_node_pt[first_non_negative_eqn_number_plus_one - 1] =
2752  nod_pt;
2753  }
2754 
2755 
2756  // Take into account master nodes too
2757  if (dynamic_cast<RefineableElement*>(el_pt) != 0)
2758  {
2759  int n_cont_int_values = dynamic_cast<RefineableElement*>(el_pt)
2760  ->ncont_interpolated_values();
2761  for (int i_cont = -1; i_cont < n_cont_int_values; i_cont++)
2762  {
2763  if (nod_pt->is_hanging(i_cont))
2764  {
2765  HangInfo* hang_pt = nod_pt->hanging_pt(i_cont);
2766  unsigned n_master = hang_pt->nmaster();
2767  for (unsigned m = 0; m < n_master; m++)
2768  {
2769  Node* master_nod_pt = hang_pt->master_node_pt(m);
2770  if (!node_done[master_nod_pt])
2771  {
2772  node_done[master_nod_pt] = true;
2773 
2774  // Loop over values stored at node (if any) to find
2775  // the first non-negative eqn number
2776  unsigned first_non_negative_eqn_number_plus_one = 0;
2777  unsigned n_val = master_nod_pt->nvalue();
2778  for (unsigned i_val = 0; i_val < n_val; i_val++)
2779  {
2780  int eqn_no = master_nod_pt->eqn_number(i_val);
2781  if (eqn_no >= 0)
2782  {
2783  first_non_negative_eqn_number_plus_one = eqn_no + 1;
2784  break;
2785  }
2786  }
2787 
2788  // If we haven't found a non-negative eqn number check
2789  // eqn numbers associated with solid data (if any)
2790  if (first_non_negative_eqn_number_plus_one == 0)
2791  {
2792  // If this master is a SolidNode then add its extra
2793  // eqn numbers
2794  SolidNode* master_solid_nod_pt =
2795  dynamic_cast<SolidNode*>(master_nod_pt);
2796  if (master_solid_nod_pt != 0)
2797  {
2798  // Loop over values stored at node (if any) to find
2799  // the first non-negative eqn number
2800  unsigned n_val =
2801  master_solid_nod_pt->variable_position_pt()
2802  ->nvalue();
2803  for (unsigned i_val = 0; i_val < n_val; i_val++)
2804  {
2805  int eqn_no =
2806  master_solid_nod_pt->variable_position_pt()
2807  ->eqn_number(i_val);
2808  if (eqn_no >= 0)
2809  {
2810  first_non_negative_eqn_number_plus_one =
2811  eqn_no + 1;
2812  break;
2813  }
2814  }
2815  }
2816  }
2817  // Associate node with first non negative global
2818  // eqn number
2819  if (first_non_negative_eqn_number_plus_one > 0)
2820  {
2821  global_node_pt[first_non_negative_eqn_number_plus_one -
2822  1] = master_nod_pt;
2823  }
2824 
2825  } // End of not-yet-done hang node
2826  }
2827  }
2828  }
2829  }
2830  } // endif for node already done
2831  } // End of loop over nodes
2832  } // End of FiniteElement
2833 
2834  // Internal data equation numbers do not need to be added since
2835  // internal data cannot be shared between distinct elements, so
2836  // internal data on locally-stored elements can never be halo.
2837  }
2838 
2839  // Set to record duplicate nodes scheduled to be killed
2840  std::set<Node*> killed_nodes;
2841 
2842  // Now loop over the other processors from highest to lowest
2843  // (i.e. if there is a duplicate between these containers
2844  // then this will use the node on the highest numbered processor)
2845  for (int iproc = n_proc - 1; iproc >= 0; iproc--)
2846  {
2847  // Don't have external halo elements with yourself!
2848  if (iproc != my_rank)
2849  {
2850  // Loop over external halo elements with iproc
2851  // to remove the duplicates
2852  unsigned n_element = mesh_pt->nexternal_halo_element(iproc);
2853  for (unsigned e_ext = 0; e_ext < n_element; e_ext++)
2854  {
2855  FiniteElement* finite_ext_el_pt = dynamic_cast<FiniteElement*>(
2856  mesh_pt->external_halo_element_pt(iproc, e_ext));
2857  if (finite_ext_el_pt != 0)
2858  {
2859  // Loop over nodes
2860  unsigned n_node = finite_ext_el_pt->nnode();
2861  for (unsigned j = 0; j < n_node; j++)
2862  {
2863  Node* nod_pt = finite_ext_el_pt->node_pt(j);
2864 
2865  // Loop over values stored at node (if any) to find
2866  // the first non-negative eqn number
2867  unsigned first_non_negative_eqn_number_plus_one = 0;
2868  unsigned n_val = nod_pt->nvalue();
2869  for (unsigned i_val = 0; i_val < n_val; i_val++)
2870  {
2871  int eqn_no = nod_pt->eqn_number(i_val);
2872  if (eqn_no >= 0)
2873  {
2874  first_non_negative_eqn_number_plus_one = eqn_no + 1;
2875  break;
2876  }
2877  }
2878 
2879  // If we haven't found a non-negative eqn number check
2880  // eqn numbers associated with solid data (if any)
2881  if (first_non_negative_eqn_number_plus_one == 0)
2882  {
2883  // Is it a solid node?
2884  SolidNode* solid_nod_pt = dynamic_cast<SolidNode*>(nod_pt);
2885  if (solid_nod_pt != 0)
2886  {
2887  // Loop over values stored at node (if any) to find
2888  // the first non-negative eqn number
2889  unsigned n_val =
2890  solid_nod_pt->variable_position_pt()->nvalue();
2891  for (unsigned i_val = 0; i_val < n_val; i_val++)
2892  {
2893  int eqn_no =
2894  solid_nod_pt->variable_position_pt()->eqn_number(i_val);
2895  if (eqn_no >= 0)
2896  {
2897  first_non_negative_eqn_number_plus_one = eqn_no + 1;
2898  break;
2899  }
2900  }
2901  }
2902  }
2903 
2904  // Identified which node we're dealing with via first non-negative
2905  // global eqn number (if there is none, everything is pinned
2906  // and we don't give a damn...)
2907  if (first_non_negative_eqn_number_plus_one > 0)
2908  {
2909  Node* existing_node_pt =
2910  global_node_pt[first_non_negative_eqn_number_plus_one - 1];
2911 
2912  // Does this node already exist?
2913  if (existing_node_pt != 0)
2914  {
2915  // Record that we're about to cull one
2916  actually_removed_some_data = true;
2917 
2918  // It's a duplicate, so store the duplicated one for
2919  // later killing...
2920  Node* duplicated_node_pt = nod_pt;
2921  if (!node_done[duplicated_node_pt])
2922  {
2923  // Remove node from all boundaries
2924  std::set<unsigned>* boundaries_pt;
2925  duplicated_node_pt->get_boundaries_pt(boundaries_pt);
2926  if (boundaries_pt != 0)
2927  {
2928  Vector<unsigned> bound;
2929  unsigned nb = (*boundaries_pt).size();
2930  bound.reserve(nb);
2931  for (std::set<unsigned>::iterator it =
2932  (*boundaries_pt).begin();
2933  it != (*boundaries_pt).end();
2934  it++)
2935  {
2936  bound.push_back((*it));
2937  }
2938  for (unsigned i = 0; i < nb; i++)
2939  {
2940  mesh_pt->remove_boundary_node(bound[i],
2941  duplicated_node_pt);
2942  }
2943  }
2944 
2945  // Get ready to kill it
2946  killed_nodes.insert(duplicated_node_pt);
2947  unsigned i_proc = unsigned(iproc);
2949  duplicated_node_pt);
2950  }
2951 
2952 
2953  // Note: For now we're leaving the "dangling" (no longer
2954  // accessed masters where they are; they get cleaned
2955  // up next time we delete all the external storage
2956  // for the meshes so it's a temporary "leak" only...
2957  // At some point we should probably delete them properly too
2958 
2959 #ifdef PARANOID
2960 
2961  // Check that hang status of exiting and replacement node
2962  // matches
2963  if (dynamic_cast<RefineableElement*>(finite_ext_el_pt) != 0)
2964  {
2965  int n_cont_inter_values =
2966  dynamic_cast<RefineableElement*>(finite_ext_el_pt)
2967  ->ncont_interpolated_values();
2968  for (int i_cont = -1; i_cont < n_cont_inter_values;
2969  i_cont++)
2970  {
2971  unsigned n_master_orig = 0;
2972  if (finite_ext_el_pt->node_pt(j)->is_hanging(i_cont))
2973  {
2974  n_master_orig = finite_ext_el_pt->node_pt(j)
2975  ->hanging_pt(i_cont)
2976  ->nmaster();
2977 
2978  // Temporary leak: Resolve like this:
2979  // loop over all external halo nodes and identify the
2980  // the ones that are still reached by any of the
2981  // external elements. Kill the dangling ones.
2982  }
2983  unsigned n_master_replace = 0;
2984  if (existing_node_pt->is_hanging(i_cont))
2985  {
2986  n_master_replace =
2987  existing_node_pt->hanging_pt(i_cont)->nmaster();
2988  }
2989 
2990  if (n_master_orig != n_master_replace)
2991  {
2992  std::ostringstream error_stream;
2993  error_stream
2994  << "Number of master nodes for node to be replaced, "
2995  << n_master_orig << ", doesn't match"
2996  << "those of replacement node, " << n_master_replace
2997  << " for i_cont=" << i_cont << std::endl;
2998  {
2999  error_stream
3000  << "Nodal coordinates of replacement node:";
3001  unsigned ndim = existing_node_pt->ndim();
3002  for (unsigned i = 0; i < ndim; i++)
3003  {
3004  error_stream << existing_node_pt->x(i) << " ";
3005  }
3006  error_stream << "\n";
3007  error_stream << "The coordinates of its "
3008  << n_master_replace
3009  << " master nodes are: \n";
3010  for (unsigned k = 0; k < n_master_replace; k++)
3011  {
3012  Node* master_nod_pt =
3013  existing_node_pt->hanging_pt(i_cont)
3014  ->master_node_pt(k);
3015  unsigned ndim = master_nod_pt->ndim();
3016  for (unsigned i = 0; i < ndim; i++)
3017  {
3018  error_stream << master_nod_pt->x(i) << " ";
3019  }
3020  error_stream << "\n";
3021  }
3022  }
3023 
3024  {
3025  error_stream
3026  << "Nodal coordinates of node to be replaced:";
3027  unsigned ndim = finite_ext_el_pt->node_pt(j)->ndim();
3028  for (unsigned i = 0; i < ndim; i++)
3029  {
3030  error_stream << finite_ext_el_pt->node_pt(j)->x(i)
3031  << " ";
3032  }
3033  error_stream << "\n";
3034  error_stream << "The coordinates of its "
3035  << n_master_orig
3036  << " master nodes are: \n";
3037  for (unsigned k = 0; k < n_master_orig; k++)
3038  {
3039  Node* master_nod_pt = finite_ext_el_pt->node_pt(j)
3040  ->hanging_pt(i_cont)
3041  ->master_node_pt(k);
3042  unsigned ndim = master_nod_pt->ndim();
3043  for (unsigned i = 0; i < ndim; i++)
3044  {
3045  error_stream << master_nod_pt->x(i) << " ";
3046  }
3047  error_stream << "\n";
3048  }
3049  }
3050 
3051 
3052  throw OomphLibError(error_stream.str(),
3053  OOMPH_CURRENT_FUNCTION,
3054  OOMPH_EXCEPTION_LOCATION);
3055  }
3056  }
3057  }
3058 #endif
3059  // ...and point to the existing one
3060  finite_ext_el_pt->node_pt(j) = existing_node_pt;
3061  }
3062  // If it doesn't add it to the list of existing ones
3063  else
3064  {
3065  global_node_pt[first_non_negative_eqn_number_plus_one - 1] =
3066  nod_pt;
3067  node_done[nod_pt] = true;
3068  }
3069  }
3070 
3071 
3072  // Do the same for any master nodes of that (possibly replaced)
3073  // node
3074  if (dynamic_cast<RefineableElement*>(finite_ext_el_pt) != 0)
3075  {
3076  int n_cont_inter_values =
3077  dynamic_cast<RefineableElement*>(finite_ext_el_pt)
3078  ->ncont_interpolated_values();
3079  for (int i_cont = -1; i_cont < n_cont_inter_values; i_cont++)
3080  {
3081  if (finite_ext_el_pt->node_pt(j)->is_hanging(i_cont))
3082  {
3083  HangInfo* hang_pt =
3084  finite_ext_el_pt->node_pt(j)->hanging_pt(i_cont);
3085  unsigned n_master = hang_pt->nmaster();
3086  for (unsigned m = 0; m < n_master; m++)
3087  {
3088  Node* master_nod_pt = hang_pt->master_node_pt(m);
3089  unsigned n_val = master_nod_pt->nvalue();
3090  unsigned first_non_negative_eqn_number_plus_one = 0;
3091  for (unsigned i_val = 0; i_val < n_val; i_val++)
3092  {
3093  int eqn_no = master_nod_pt->eqn_number(i_val);
3094  if (eqn_no >= 0)
3095  {
3096  first_non_negative_eqn_number_plus_one = eqn_no + 1;
3097  break;
3098  }
3099  }
3100 
3101  // If we haven't found a non-negative eqn number check
3102  // eqn numbers associated with solid data (if any)
3103  if (first_non_negative_eqn_number_plus_one == 0)
3104  {
3105  SolidNode* solid_master_nod_pt =
3106  dynamic_cast<SolidNode*>(master_nod_pt);
3107  if (solid_master_nod_pt != 0)
3108  {
3109  // Loop over values stored at node (if any) to find
3110  // the first non-negative eqn number
3111  unsigned n_val =
3112  solid_master_nod_pt->variable_position_pt()
3113  ->nvalue();
3114  for (unsigned i_val = 0; i_val < n_val; i_val++)
3115  {
3116  int eqn_no =
3117  solid_master_nod_pt->variable_position_pt()
3118  ->eqn_number(i_val);
3119  if (eqn_no >= 0)
3120  {
3121  first_non_negative_eqn_number_plus_one =
3122  eqn_no + 1;
3123  break;
3124  }
3125  }
3126  }
3127  }
3128 
3129  // Identified which node we're dealing with via
3130  // first non-negative global eqn number (if there
3131  // is none, everything is pinned and we don't give a
3132  // damn...)
3133  if (first_non_negative_eqn_number_plus_one > 0)
3134  {
3135  Node* existing_node_pt = global_node_pt
3136  [first_non_negative_eqn_number_plus_one - 1];
3137 
3138  // Does this node already exist?
3139  if (existing_node_pt != 0)
3140  {
3141  // Record that we're about to cull one
3142  actually_removed_some_data = true;
3143 
3144  // It's a duplicate, so store the duplicated one for
3145  // later killing...
3146  Node* duplicated_node_pt = master_nod_pt;
3147 
3148  if (!node_done[duplicated_node_pt])
3149  {
3150  // Remove node from all boundaries
3151  std::set<unsigned>* boundaries_pt;
3152  duplicated_node_pt->get_boundaries_pt(
3153  boundaries_pt);
3154  if (boundaries_pt != 0)
3155  {
3156  for (std::set<unsigned>::iterator it =
3157  (*boundaries_pt).begin();
3158  it != (*boundaries_pt).end();
3159  it++)
3160  {
3162  (*it), duplicated_node_pt);
3163  }
3164  }
3165 
3166  killed_nodes.insert(duplicated_node_pt);
3167  unsigned i_proc = unsigned(iproc);
3169  i_proc, duplicated_node_pt);
3170  }
3171 
3172  // Weight of the original node
3173  double m_weight = hang_pt->master_weight(m);
3174 
3175 
3176 #ifdef PARANOID
3177  // Sanity check: setting replacement master
3178  // node for non-hanging node? Sign of really
3179  // f***ed up code.
3180  Node* tmp_nod_pt = finite_ext_el_pt->node_pt(j);
3181  if (!tmp_nod_pt->is_hanging(i_cont))
3182  {
3183  std::ostringstream error_stream;
3184  error_stream
3185  << "About to re-set master for i_cont= " << i_cont
3186  << " for external node (with proc " << iproc
3187  << " )" << tmp_nod_pt << " at ";
3188  unsigned n = tmp_nod_pt->ndim();
3189  for (unsigned jj = 0; jj < n; jj++)
3190  {
3191  error_stream << tmp_nod_pt->x(jj) << " ";
3192  }
3193  error_stream
3194  << " which is not hanging --> About to die!"
3195  << "Outputting offending element into oomph-info "
3196  << "stream. \n\n";
3197  oomph_info << "\n\n";
3198  finite_ext_el_pt->output(*(oomph_info.stream_pt()));
3199  oomph_info << "\n\n";
3200  oomph_info.stream_pt()->flush();
3201  throw OomphLibError(error_stream.str(),
3202  OOMPH_CURRENT_FUNCTION,
3203  OOMPH_EXCEPTION_LOCATION);
3204  }
3205 #endif
3206 
3207 
3208  // And re-set master
3209  finite_ext_el_pt->node_pt(j)
3210  ->hanging_pt(i_cont)
3211  ->set_master_node_pt(m, existing_node_pt, m_weight);
3212  }
3213  // If it doesn't, add it to the list of existing ones
3214  else
3215  {
3216  global_node_pt
3217  [first_non_negative_eqn_number_plus_one - 1] =
3218  master_nod_pt;
3219  node_done[master_nod_pt] = true;
3220  }
3221  }
3222  } // End of loop over master nodes
3223  } // end of hanging
3224  } // end of loop over continously interpolated variables
3225  } // end refineable element (with potentially hanging node
3226 
3227  } // end loop over nodes on external halo elements
3228 
3229  } // End of check for finite element
3230 
3231  } // end loop over external halo elements
3232  }
3233  } // end loop over processors
3234 
3235 
3236  // Now kill all the deleted nodes
3237  for (std::set<Node*>::iterator it = killed_nodes.begin();
3238  it != killed_nodes.end();
3239  it++)
3240  {
3241  delete (*it);
3242  }
3243 
3244 
3245  // oomph_info << "Number of nonzero entries in global_node_pt: "
3246  // << global_node_pt.size() << std::endl;
3247 
3248  // // Time it...
3249  // // Taken out again by MH -- clutters up output
3250  // if (Global_timings::Doc_comprehensive_timings)
3251  // {
3252  // double t_end = TimingHelpers::timer();
3253  // oomph_info
3254  // << "Total time for Problem::remove_duplicate_data: "
3255  // << t_end-t_start << std::endl;
3256  // }
3257  }
3258 
3259 
3260  //========================================================================
3261  /// Consolidate external halo node storage by removing nulled out
3262  /// pointers in external halo and haloed schemes for all meshes.
3263  //========================================================================
3265  {
3266  // Do we have submeshes?
3267  unsigned n_mesh_loop = 1;
3268  unsigned nmesh = nsub_mesh();
3269  if (nmesh > 0)
3270  {
3271  n_mesh_loop = nmesh;
3272  }
3273 
3274  // Storage for number of processors and current processor
3275  int n_proc = this->communicator_pt()->nproc();
3276  int my_rank = this->communicator_pt()->my_rank();
3277 
3278  // If only one processor then return
3279  if (n_proc == 1)
3280  {
3281  return;
3282  }
3283 
3284  // Loop over all (other) processors and store index of any nulled-out
3285  // external halo nodes in storage scheme.
3286 
3287  // Data to be sent to each processor
3288  Vector<int> send_n(n_proc, 0);
3289 
3290  // Storage for all values to be sent to all processors
3291  Vector<int> send_data;
3292 
3293  // Start location within send_data for data to be sent to each processor
3294  Vector<int> send_displacement(n_proc, 0);
3295 
3296  // Check missing ones
3297  for (int domain = 0; domain < n_proc; domain++)
3298  {
3299  // Set the offset for the current processor
3300  send_displacement[domain] = send_data.size();
3301 
3302  // Don't bother to do anything if the processor in the loop is the
3303  // current processor
3304  if (domain != my_rank)
3305  {
3306  // Deal with sub-meshes one-by-one if required
3307  Mesh* my_mesh_pt = 0;
3308 
3309  // Loop over submeshes
3310  for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
3311  {
3312  if (nmesh == 0)
3313  {
3314  my_mesh_pt = mesh_pt();
3315  }
3316  else
3317  {
3318  my_mesh_pt = mesh_pt(imesh);
3319  }
3320 
3321  // Make backup of external halo node pointers with this domain
3322  Vector<Node*> backup_pt(my_mesh_pt->external_halo_node_pt(domain));
3323 
3324  // How many do we have currently?
3325  unsigned nnod = backup_pt.size();
3326 
3327  // Prepare storage for updated halo nodes
3328  Vector<Node*> new_external_halo_node_pt;
3329  new_external_halo_node_pt.reserve(nnod);
3330 
3331  // Loop over external halo nodes with this domain
3332  for (unsigned j = 0; j < nnod; j++)
3333  {
3334  // Get pointer to node
3335  Node* nod_pt = backup_pt[j];
3336 
3337  // Has it been nulled out?
3338  if (nod_pt == 0)
3339  {
3340  // Save index of nulled out one
3341  send_data.push_back(j);
3342  }
3343  else
3344  {
3345  // Still alive: Copy across
3346  new_external_halo_node_pt.push_back(nod_pt);
3347  }
3348  }
3349 
3350  // Set new external halo node vector
3351  my_mesh_pt->set_external_halo_node_pt(domain,
3352  new_external_halo_node_pt);
3353 
3354  // End of data for this mesh
3355  send_data.push_back(-1);
3356 
3357  } // end of loop over meshes
3358 
3359  } // end skip own domain
3360 
3361  // Find the number of data added to the vector
3362  send_n[domain] = send_data.size() - send_displacement[domain];
3363  }
3364 
3365  // Storage for the number of data to be received from each processor
3366  Vector<int> receive_n(n_proc, 0);
3367 
3368  // Now send numbers of data to be sent between all processors
3369  MPI_Alltoall(&send_n[0],
3370  1,
3371  MPI_INT,
3372  &receive_n[0],
3373  1,
3374  MPI_INT,
3375  this->communicator_pt()->mpi_comm());
3376 
3377 
3378  // We now prepare the data to be received
3379  // by working out the displacements from the received data
3380  Vector<int> receive_displacement(n_proc, 0);
3381  int receive_data_count = 0;
3382  for (int rank = 0; rank < n_proc; ++rank)
3383  {
3384  // Displacement is number of data received so far
3385  receive_displacement[rank] = receive_data_count;
3386  receive_data_count += receive_n[rank];
3387  }
3388 
3389  // Now resize the receive buffer for all data from all processors
3390  // Make sure that it has a size of at least one
3391  if (receive_data_count == 0)
3392  {
3393  ++receive_data_count;
3394  }
3395  Vector<int> receive_data(receive_data_count);
3396 
3397  // Make sure that the send buffer has size at least one
3398  // so that we don't get a segmentation fault
3399  if (send_data.size() == 0)
3400  {
3401  send_data.resize(1);
3402  }
3403 
3404  // Now send the data between all the processors
3405  MPI_Alltoallv(&send_data[0],
3406  &send_n[0],
3407  &send_displacement[0],
3408  MPI_INT,
3409  &receive_data[0],
3410  &receive_n[0],
3411  &receive_displacement[0],
3412  MPI_INT,
3413  this->communicator_pt()->mpi_comm());
3414 
3415  // Now use the received data
3416  for (int send_rank = 0; send_rank < n_proc; send_rank++)
3417  {
3418  // Don't bother to do anything for the processor corresponding to the
3419  // current processor or if no data were received from this processor
3420  if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
3421  {
3422  // Counter for the data within the large array
3423  unsigned count = receive_displacement[send_rank];
3424 
3425  // Deal with sub-meshes one-by-one if required
3426  Mesh* my_mesh_pt = 0;
3427 
3428  // Loop over submeshes
3429  for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
3430  {
3431  if (nmesh == 0)
3432  {
3433  my_mesh_pt = mesh_pt();
3434  }
3435  else
3436  {
3437  my_mesh_pt = mesh_pt(imesh);
3438  }
3439 
3440  // Make backup of external haloed node pointers with this domain
3441  Vector<Node*> backup_pt =
3442  my_mesh_pt->external_haloed_node_pt(send_rank);
3443 
3444  // Unpack until we reach "end of data" indicator (-1) for this mesh
3445  while (true)
3446  {
3447  // Read next entry
3448  int next_one = receive_data[count++];
3449 
3450  if (next_one == -1)
3451  {
3452  break;
3453  }
3454  else
3455  {
3456  // Null out the entry
3457  backup_pt[next_one] = 0;
3458  }
3459  }
3460 
3461  // How many do we have currently?
3462  unsigned nnod = backup_pt.size();
3463 
3464  // Prepare storage for updated haloed nodes
3465  Vector<Node*> new_external_haloed_node_pt;
3466  new_external_haloed_node_pt.reserve(nnod);
3467 
3468  // Loop over external haloed nodes with this domain
3469  for (unsigned j = 0; j < nnod; j++)
3470  {
3471  // Get pointer to node
3472  Node* nod_pt = backup_pt[j];
3473 
3474  // Has it been nulled out?
3475  if (nod_pt != 0)
3476  {
3477  // Still alive: Copy across
3478  new_external_haloed_node_pt.push_back(nod_pt);
3479  }
3480  }
3481 
3482  // Set new external haloed node vector
3483  my_mesh_pt->set_external_haloed_node_pt(send_rank,
3484  new_external_haloed_node_pt);
3485  }
3486  }
3487 
3488  } // End of data is received
3489  }
3490 
3491 #endif
3492 
3493 
3494  //=======================================================================
3495  /// Function that sets the values of the dofs in the object
3496  //======================================================================
3498  {
3499  const unsigned long n_dof = this->ndof();
3500 #ifdef PARANOID
3501  if (n_dof != dofs.nrow())
3502  {
3503  std::ostringstream error_stream;
3504  error_stream << "Number of degrees of freedom in vector argument "
3505  << dofs.nrow() << "\n"
3506  << "does not equal number of degrees of freedom in problem "
3507  << n_dof;
3508  throw OomphLibError(
3509  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
3510  }
3511 #endif
3512  for (unsigned long l = 0; l < n_dof; l++)
3513  {
3514  *Dof_pt[l] = dofs[l];
3515  }
3516  }
3517 
3518  /// Set history values of dofs
3519  void Problem::set_dofs(const unsigned& t, DoubleVector& dofs)
3520  {
3521 #ifdef PARANOID
3522  if (distributed())
3523  {
3524  throw OomphLibError("Not designed for distributed problems",
3525  OOMPH_EXCEPTION_LOCATION,
3526  OOMPH_CURRENT_FUNCTION);
3527  // might work if the dofs vector is distributed in the right way...
3528  }
3529 #endif
3530 
3531  // First deal with global data
3532  unsigned Nglobal_data = nglobal_data();
3533  for (unsigned i = 0; i < Nglobal_data; i++)
3534  {
3535  for (unsigned j = 0, nj = Global_data_pt[i]->nvalue(); j < nj; j++)
3536  {
3537  // For each data get the equation number and copy out the value.
3538  int eqn_number = Global_data_pt[i]->eqn_number(j);
3539  if (eqn_number >= 0)
3540  {
3541  Global_data_pt[i]->set_value(t, j, dofs[eqn_number]);
3542  }
3543  }
3544  }
3545 
3546  // Next element internal data
3547  for (unsigned i = 0, ni = mesh_pt()->nelement(); i < ni; i++)
3548  {
3549  GeneralisedElement* ele_pt = mesh_pt()->element_pt(i);
3550  for (unsigned j = 0, nj = ele_pt->ninternal_data(); j < nj; j++)
3551  {
3552  Data* d_pt = ele_pt->internal_data_pt(j);
3553  for (unsigned k = 0, nk = d_pt->nvalue(); k < nk; k++)
3554  {
3555  int eqn_number = d_pt->eqn_number(k);
3556  if (eqn_number >= 0)
3557  {
3558  d_pt->set_value(t, k, dofs[eqn_number]);
3559  }
3560  }
3561  }
3562  }
3563 
3564  // Now the nodes
3565  for (unsigned i = 0, ni = mesh_pt()->nnode(); i < ni; i++)
3566  {
3567  Node* node_pt = mesh_pt()->node_pt(i);
3568  for (unsigned j = 0, nj = node_pt->nvalue(); j < nj; j++)
3569  {
3570  // For each node get the equation number and copy out the value.
3571  int eqn_number = node_pt->eqn_number(j);
3572  if (eqn_number >= 0)
3573  {
3574  node_pt->set_value(t, j, dofs[eqn_number]);
3575  }
3576  }
3577  }
3578  }
3579 
3580 
3581  /// Set history values of dofs from the type of vector stored in
3582  /// problem::Dof_pt.
3583  void Problem::set_dofs(const unsigned& t, Vector<double*>& dof_pt)
3584  {
3585 #ifdef PARANOID
3586  if (distributed())
3587  {
3588  throw OomphLibError("Not implemented for distributed problems!",
3589  OOMPH_EXCEPTION_LOCATION,
3590  OOMPH_CURRENT_FUNCTION);
3591  }
3592 #endif
3593 
3594  // If we have any spine meshes I think there might be more degrees
3595  // of freedom there. I don't use them though so I'll let someone who
3596  // knows what they are doing handle it. --David Shepherd
3597 
3598  // First deal with global data
3599  unsigned Nglobal_data = nglobal_data();
3600  for (unsigned i = 0; i < Nglobal_data; i++)
3601  {
3602  for (unsigned j = 0, nj = Global_data_pt[i]->nvalue(); j < nj; j++)
3603  {
3604  // For each data get the equation number and copy in the value.
3605  int eqn_number = Global_data_pt[i]->eqn_number(j);
3606  if (eqn_number >= 0)
3607  {
3608  Global_data_pt[i]->set_value(t, j, *(dof_pt[eqn_number]));
3609  }
3610  }
3611  }
3612 
3613  // Now the mesh data
3614  // nodes
3615  for (unsigned i = 0, ni = mesh_pt()->nnode(); i < ni; i++)
3616  {
3617  Node* node_pt = mesh_pt()->node_pt(i);
3618  for (unsigned j = 0, nj = node_pt->nvalue(); j < nj; j++)
3619  {
3620  // For each node get the equation number and copy in the value.
3621  int eqn_number = node_pt->eqn_number(j);
3622  if (eqn_number >= 0)
3623  {
3624  node_pt->set_value(t, j, *(dof_pt[eqn_number]));
3625  }
3626  }
3627  }
3628 
3629  // and non-nodal data inside elements
3630  for (unsigned i = 0, ni = mesh_pt()->nelement(); i < ni; i++)
3631  {
3632  GeneralisedElement* ele_pt = mesh_pt()->element_pt(i);
3633  for (unsigned j = 0, nj = ele_pt->ninternal_data(); j < nj; j++)
3634  {
3635  Data* data_pt = ele_pt->internal_data_pt(j);
3636  // For each node get the equation number and copy in the value.
3637  int eqn_number = data_pt->eqn_number(j);
3638  if (eqn_number >= 0)
3639  {
3640  data_pt->set_value(t, j, *(dof_pt[eqn_number]));
3641  }
3642  }
3643  }
3644  }
3645 
3646 
3647  //===================================================================
3648  /// Function that adds the values to the dofs
3649  //==================================================================
3650  void Problem::add_to_dofs(const double& lambda,
3651  const DoubleVector& increment_dofs)
3652  {
3653  const unsigned long n_dof = this->ndof();
3654  for (unsigned long l = 0; l < n_dof; l++)
3655  {
3656  *Dof_pt[l] += lambda * increment_dofs[l];
3657  }
3658  }
3659 
3660 
3661  //=========================================================================
3662  /// Return the residual vector multiplied by the inverse mass matrix
3663  /// Virtual so that it can be overloaded for mpi problems
3664  //=========================================================================
3666  {
3667  // This function does not make sense for assembly handlers other than the
3668  // default, so complain if we try to call it with another handler
3669 
3670 #ifdef PARANOID
3671  // If we are not the default, then complain
3673  {
3674  std::ostringstream error_stream;
3675  error_stream << "The function get_inverse_mass_matrix_times_residuals() "
3676  "can only be\n"
3677  << "used with the default assembly handler\n\n";
3678  throw OomphLibError(
3679  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
3680  }
3681 #endif
3682 
3683  // Find the number of degrees of freedom in the problem
3684  const unsigned n_dof = this->ndof();
3685 
3686  // Resize the vector
3687  LinearAlgebraDistribution dist(this->communicator_pt(), n_dof, false);
3688  Mres.build(&dist, 0.0);
3689 
3690  // If we have discontinuous formulation
3691  // We can invert the mass matrix element by element
3693  {
3694  // Loop over the elements and get their residuals
3695  const unsigned n_element = Problem::mesh_pt()->nelement();
3696  Vector<double> element_Mres;
3697  for (unsigned e = 0; e < n_element; e++)
3698  {
3699  // Cache the element
3700  DGElement* const elem_pt =
3701  dynamic_cast<DGElement*>(Problem::mesh_pt()->element_pt(e));
3702 
3703  // Find the elemental inverse mass matrix times residuals
3704  const unsigned n_el_dofs = elem_pt->ndof();
3705  elem_pt->get_inverse_mass_matrix_times_residuals(element_Mres);
3706 
3707  // Add contribution to global matrix
3708  for (unsigned i = 0; i < n_el_dofs; i++)
3709  {
3710  Mres[elem_pt->eqn_number(i)] = element_Mres[i];
3711  }
3712  }
3713  }
3714  // Otherwise it's continous and we must invert the full
3715  // mass matrix via a global linear solve.
3716  else
3717  {
3718  // Now do the linear solve -- recycling Mass matrix if requested
3719  // If we already have the factorised mass matrix, then resolve
3721  {
3723  {
3724  oomph_info << "Not recomputing Mass Matrix " << std::endl;
3725  }
3726 
3727  // Get the residuals
3728  DoubleVector residuals(&dist, 0.0);
3729  this->get_residuals(residuals);
3730 
3731  // Resolve the linear system
3733  residuals, Mres);
3734  }
3735  // Otherwise solve for the first time
3736  else
3737  {
3738  // If we wish to reuse the mass matrix, then enable resolve
3740  {
3742  {
3743  oomph_info << "Enabling resolve in explicit timestep" << std::endl;
3744  }
3746  ->enable_resolve();
3747  }
3748 
3749  // Use a custom assembly handler to assemble and invert the mass matrix
3750 
3751  // Store the old assembly handler
3752  AssemblyHandler* old_assembly_handler_pt = this->assembly_handler_pt();
3753  // Set the assembly handler to the explicit timestep handler
3755 
3756  // Solve the linear system
3758  Mres);
3759  // The mass matrix has now been computed
3761 
3762  // Delete the Explicit Timestep handler
3763  delete this->assembly_handler_pt();
3764  // Reset the assembly handler to the original handler
3765  this->assembly_handler_pt() = old_assembly_handler_pt;
3766  }
3767  }
3768  }
3769 
3771  {
3772  // Loop over timesteppers: make them (temporarily) steady and store their
3773  // is_steady status.
3774  unsigned n_time_steppers = this->ntime_stepper();
3775  std::vector<bool> was_steady(n_time_steppers);
3776  for (unsigned i = 0; i < n_time_steppers; i++)
3777  {
3778  was_steady[i] = time_stepper_pt(i)->is_steady();
3780  }
3781 
3782  // Calculate f using the residual/jacobian machinary.
3784 
3785  // Reset the is_steady status of all timesteppers that weren't already
3786  // steady when we came in here and reset their weights
3787  for (unsigned i = 0; i < n_time_steppers; i++)
3788  {
3789  if (!was_steady[i])
3790  {
3792  }
3793  }
3794  }
3795 
3796 
3797  //================================================================
3798  /// Get the total residuals Vector for the problem
3799  //================================================================
3801  {
3802  // Three different cases; if MPI_Helpers::MPI_has_been_initialised=true
3803  // this means MPI_Helpers::init() has been called. This could happen on a
3804  // code compiled with MPI but run serially; in this instance the
3805  // get_residuals function still works on one processor.
3806  //
3807  // Secondly, if a code has been compiled with MPI, but MPI_Helpers::init()
3808  // has not been called, then MPI_Helpers::MPI_has_been_initialised=false
3809  // and the code calls...
3810  //
3811  // Thirdly, the serial version (compiled by all, but only run when compiled
3812  // with MPI if MPI_Helpers::MPI_has_been_initialised=false
3813 
3814  // Check that the residuals has the correct number of rows if it has been
3815  // setup
3816 #ifdef PARANOID
3817  if (residuals.built())
3818  {
3819  if (residuals.distribution_pt()->nrow() != this->ndof())
3820  {
3821  std::ostringstream error_stream;
3822  error_stream << "The distribution of the residuals vector does not "
3823  "have the correct\n"
3824  << "number of global rows\n";
3825 
3826  throw OomphLibError(
3827  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
3828  }
3829  }
3830 #endif
3831 
3832  // Determine the distribution for the residuals vector
3833  // IF the vector has distribution setup then use that
3834  // ELSE determine the distribution based on the
3835  // distributed_matrix_distribution enum
3836  LinearAlgebraDistribution* dist_pt = 0;
3837  if (residuals.built())
3838  {
3839  dist_pt = new LinearAlgebraDistribution(residuals.distribution_pt());
3840  }
3841  else
3842  {
3844  }
3845 
3846  // Locally cache pointer to assembly handler
3848 
3849  // Build and zero the residuals
3850  residuals.build(dist_pt, 0.0);
3851 
3852  // Serial (or one processor case)
3853 #ifdef OOMPH_HAS_MPI
3854  if (this->communicator_pt()->nproc() == 1)
3855  {
3856 #endif // OOMPH_HAS_MPI
3857  // Loop over all the elements
3858  unsigned long Element_pt_range = Mesh_pt->nelement();
3859  for (unsigned long e = 0; e < Element_pt_range; e++)
3860  {
3861  // Get the pointer to the element
3862  GeneralisedElement* elem_pt = Mesh_pt->element_pt(e);
3863  // Find number of dofs in the element
3864  unsigned n_element_dofs = assembly_handler_pt->ndof(elem_pt);
3865  // Set up an array
3866  Vector<double> element_residuals(n_element_dofs);
3867  // Fill the array
3868  assembly_handler_pt->get_residuals(elem_pt, element_residuals);
3869  // Now loop over the dofs and assign values to global Vector
3870  for (unsigned l = 0; l < n_element_dofs; l++)
3871  {
3872  residuals[assembly_handler_pt->eqn_number(elem_pt, l)] +=
3873  element_residuals[l];
3874  }
3875  }
3876  // Otherwise parallel case
3877 #ifdef OOMPH_HAS_MPI
3878  }
3879  else
3880  {
3881  // Store the current assembly handler
3882  AssemblyHandler* const old_assembly_handler_pt = Assembly_handler_pt;
3883  // Create a new assembly handler that only assembles the residuals
3885  new ParallelResidualsHandler(old_assembly_handler_pt);
3886 
3887  // Setup memory for parallel sparse assemble
3888  // No matrix so all size zero
3889  Vector<int*> column_index;
3890  Vector<int*> row_start;
3891  Vector<double*> value;
3892  Vector<unsigned> nnz;
3893  // One set of residuals of sizer one
3894  Vector<double*> res(1);
3895 
3896  // Call the parallel sparse assemble, that should only assemble residuals
3898  dist_pt, column_index, row_start, value, nnz, res);
3899  // Fill in the residuals data
3900  residuals.set_external_values(res[0], true);
3901 
3902  // Delete new assembly handler
3903  delete Assembly_handler_pt;
3904  // Reset the assembly handler to the original
3905  Assembly_handler_pt = old_assembly_handler_pt;
3906  }
3907 #endif
3908 
3909  // Delete the distribution
3910  delete dist_pt;
3911  }
3912 
3913  //=============================================================================
3914  /// Get the fully assembled residual vector and Jacobian matrix
3915  /// in dense storage. The DoubleVector residuals returned will be
3916  /// non-distributed. If on calling this method the DoubleVector residuals is
3917  /// setup then it must be non-distributed and of the correct length.
3918  /// The matrix type DenseDoubleMatrix is not distributable and therefore
3919  /// the residual vector is also assumed to be non distributable.
3920  //=============================================================================
3922  DenseDoubleMatrix& jacobian)
3923  {
3924  // get the number of degrees of freedom
3925  unsigned n_dof = ndof();
3926 
3927 #ifdef PARANOID
3928  // PARANOID checks : if the distribution of residuals is setup then it must
3929  // must not be distributed, have the right number of rows, and the same
3930  // communicator as the problem
3931  if (residuals.built())
3932  {
3933  if (residuals.distribution_pt()->distributed())
3934  {
3935  std::ostringstream error_stream;
3936  error_stream
3937  << "If the DoubleVector residuals is setup then it must not "
3938  << "be distributed.";
3939  throw OomphLibError(
3940  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
3941  }
3942  if (residuals.distribution_pt()->nrow() != n_dof)
3943  {
3944  std::ostringstream error_stream;
3945  error_stream
3946  << "If the DoubleVector residuals is setup then it must have"
3947  << " the correct number of rows";
3948  throw OomphLibError(
3949  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
3950  }
3951  if (!(*Communicator_pt ==
3952  *residuals.distribution_pt()->communicator_pt()))
3953  {
3954  std::ostringstream error_stream;
3955  error_stream
3956  << "If the DoubleVector residuals is setup then it must have"
3957  << " the same communicator as the problem.";
3958  throw OomphLibError(
3959  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
3960  }
3961  }
3962 #endif
3963 
3964  // set the residuals distribution if it is not setup
3965  if (!residuals.built())
3966  {
3967  LinearAlgebraDistribution dist(Communicator_pt, n_dof, false);
3968  residuals.build(&dist, 0.0);
3969  }
3970  // else just zero the residuals
3971  else
3972  {
3973  residuals.initialise(0.0);
3974  }
3975 
3976  // Resize the matrices -- this cannot always be done externally
3977  // because get_jacobian exists in many different versions for
3978  // different storage formats -- resizing a CC or CR matrix doesn't
3979  // make sense.
3980 
3981  // resize the jacobian
3982  jacobian.resize(n_dof, n_dof);
3983  jacobian.initialise(0.0);
3984 
3985  // Locally cache pointer to assembly handler
3987 
3988  // Loop over all the elements
3989  unsigned long n_element = Mesh_pt->nelement();
3990  for (unsigned long e = 0; e < n_element; e++)
3991  {
3992  // Get the pointer to the element
3993  GeneralisedElement* elem_pt = Mesh_pt->element_pt(e);
3994  // Find number of dofs in the element
3995  unsigned n_element_dofs = assembly_handler_pt->ndof(elem_pt);
3996  // Set up an array
3997  Vector<double> element_residuals(n_element_dofs);
3998  // Set up a matrix
3999  DenseMatrix<double> element_jacobian(n_element_dofs);
4000  // Fill the array
4002  elem_pt, element_residuals, element_jacobian);
4003  // Now loop over the dofs and assign values to global Vector
4004  for (unsigned l = 0; l < n_element_dofs; l++)
4005  {
4006  unsigned long eqn_number = assembly_handler_pt->eqn_number(elem_pt, l);
4007  residuals[eqn_number] += element_residuals[l];
4008  for (unsigned l2 = 0; l2 < n_element_dofs; l2++)
4009  {
4010  jacobian(eqn_number, assembly_handler_pt->eqn_number(elem_pt, l2)) +=
4011  element_jacobian(l, l2);
4012  }
4013  }
4014  }
4015  }
4016 
4017  //=============================================================================
4018  /// Return the fully-assembled Jacobian and residuals for the problem,
4019  /// in the case where the Jacobian matrix is in a distributable
4020  /// row compressed storage format.
4021  /// 1. If the distribution of the jacobian and residuals is setup then, they
4022  /// will be returned with that distribution.
4023  /// Note. the jacobian and residuals must have the same distribution.
4024  /// 2. If the distribution of the jacobian and residuals are not setup then
4025  /// their distribution will computed based on:
4026  /// Distributed_problem_matrix_distribution.
4027  //=============================================================================
4029  {
4030  // Three different cases; if MPI_Helpers::MPI_has_been_initialised=true
4031  // this means MPI_Helpers::setup() has been called. This could happen on a
4032  // code compiled with MPI but run serially; in this instance the
4033  // get_residuals function still works on one processor.
4034  //
4035  // Secondly, if a code has been compiled with MPI, but MPI_Helpers::setup()
4036  // has not been called, then MPI_Helpers::MPI_has_been_initialised=false
4037  // and the code calls...
4038  //
4039  // Thirdly, the serial version (compiled by all, but only run when compiled
4040  // with MPI if MPI_Helpers::MPI_has_been_initialised=false
4041  //
4042  // The only case where an MPI code cannot run serially at present
4043  // is one where the distribute function is used (i.e. METIS is called)
4044 
4045  // Allocate storage for the matrix entries
4046  // The generalised Vector<Vector<>> structure is required
4047  // for the most general interface to sparse_assemble() which allows
4048  // the assembly of multiple matrices at once.
4049  Vector<int*> column_index(1);
4050  Vector<int*> row_start(1);
4051  Vector<double*> value(1);
4052  Vector<unsigned> nnz(1);
4053 
4054 #ifdef PARANOID
4055  // PARANOID checks that the distribution of the jacobian matches that of the
4056  // residuals (if they are setup) and that they have the right number of rows
4057  if (residuals.built() && jacobian.distribution_built())
4058  {
4059  if (!(*residuals.distribution_pt() == *jacobian.distribution_pt()))
4060  {
4061  std::ostringstream error_stream;
4062  error_stream << "The distribution of the residuals must "
4063  << "be the same as the distribution of the jacobian.";
4064  throw OomphLibError(
4065  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4066  }
4067  if (jacobian.distribution_pt()->nrow() != this->ndof())
4068  {
4069  std::ostringstream error_stream;
4070  error_stream
4071  << "The distribution of the jacobian and residuals does not"
4072  << "have the correct number of global rows.";
4073  throw OomphLibError(
4074  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4075  }
4076  }
4077  else if (residuals.built() != jacobian.distribution_built())
4078  {
4079  std::ostringstream error_stream;
4080  error_stream << "The distribution of the jacobian and residuals must "
4081  << "both be setup or both not setup";
4082  throw OomphLibError(
4083  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4084  }
4085 #endif
4086 
4087 
4088  // Allocate generalised storage format for passing to sparse_assemble()
4089  Vector<double*> res(1);
4090 
4091  // determine the distribution for the jacobian.
4092  // IF the jacobian has distribution setup then use that
4093  // ELSE determine the distribution based on the
4094  // distributed_matrix_distribution enum
4095  LinearAlgebraDistribution* dist_pt = 0;
4096  if (jacobian.distribution_built())
4097  {
4098  dist_pt = new LinearAlgebraDistribution(jacobian.distribution_pt());
4099  }
4100  else
4101  {
4103  }
4104 
4105 
4106  // The matrix is in compressed row format
4107  bool compressed_row_flag = true;
4108 
4109 #ifdef OOMPH_HAS_MPI
4110  //
4111  if (Communicator_pt->nproc() == 1)
4112  {
4113 #endif
4115  column_index, row_start, value, nnz, res, compressed_row_flag);
4116  jacobian.build(dist_pt);
4117  jacobian.build_without_copy(
4118  dist_pt->nrow(), nnz[0], value[0], column_index[0], row_start[0]);
4119  residuals.build(dist_pt, 0.0);
4120  residuals.set_external_values(res[0], true);
4121 #ifdef OOMPH_HAS_MPI
4122  }
4123  else
4124  {
4125  if (dist_pt->distributed())
4126  {
4128  dist_pt, column_index, row_start, value, nnz, res);
4129  jacobian.build(dist_pt);
4130  jacobian.build_without_copy(
4131  dist_pt->nrow(), nnz[0], value[0], column_index[0], row_start[0]);
4132  residuals.build(dist_pt, 0.0);
4133  residuals.set_external_values(res[0], true);
4134  }
4135  else
4136  {
4137  LinearAlgebraDistribution* temp_dist_pt =
4138  new LinearAlgebraDistribution(Communicator_pt, dist_pt->nrow(), true);
4140  temp_dist_pt, column_index, row_start, value, nnz, res);
4141  jacobian.build(temp_dist_pt);
4142  jacobian.build_without_copy(
4143  dist_pt->nrow(), nnz[0], value[0], column_index[0], row_start[0]);
4144  jacobian.redistribute(dist_pt);
4145  residuals.build(temp_dist_pt, 0.0);
4146  residuals.set_external_values(res[0], true);
4147  residuals.redistribute(dist_pt);
4148  delete temp_dist_pt;
4149  }
4150  }
4151 #endif
4152 
4153  // clean up dist_pt and residuals_vector pt
4154  delete dist_pt;
4155  }
4156 
4157  //=============================================================================
4158  /// Return the fully-assembled Jacobian and residuals for the problem,
4159  /// in the case when the jacobian matrix is in column-compressed storage
4160  /// format.
4161  //=============================================================================
4163  {
4164  // Three different cases; if MPI_Helpers::MPI_has_been_initialised=true
4165  // this means MPI_Helpers::setup() has been called. This could happen on a
4166  // code compiled with MPI but run serially; in this instance the
4167  // get_residuals function still works on one processor.
4168  //
4169  // Secondly, if a code has been compiled with MPI, but MPI_Helpers::setup()
4170  // has not been called, then MPI_Helpers::MPI_has_been_initialised=false
4171  // and the code calls...
4172  //
4173  // Thirdly, the serial version (compiled by all, but only run when compiled
4174  // with MPI if MPI_Helpers::MPI_has_been_5Binitialised=false
4175  //
4176  // The only case where an MPI code cannot run serially at present
4177  // is one where the distribute function is used (i.e. METIS is called)
4178 
4179  // get the number of degrees of freedom
4180  unsigned n_dof = ndof();
4181 
4182 #ifdef PARANOID
4183  // PARANOID checks : if the distribution of residuals is setup then it must
4184  // must not be distributed, have the right number of rows, and the same
4185  // communicator as the problem
4186  if (residuals.built())
4187  {
4188  if (residuals.distribution_pt()->distributed())
4189  {
4190  std::ostringstream error_stream;
4191  error_stream
4192  << "If the DoubleVector residuals is setup then it must not "
4193  << "be distributed.";
4194  throw OomphLibError(
4195  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4196  }
4197  if (residuals.distribution_pt()->nrow() != n_dof)
4198  {
4199  std::ostringstream error_stream;
4200  error_stream
4201  << "If the DoubleVector residuals is setup then it must have"
4202  << " the correct number of rows";
4203  throw OomphLibError(
4204  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4205  }
4206  if (!(*Communicator_pt ==
4207  *residuals.distribution_pt()->communicator_pt()))
4208  {
4209  std::ostringstream error_stream;
4210  error_stream
4211  << "If the DoubleVector residuals is setup then it must have"
4212  << " the same communicator as the problem.";
4213  throw OomphLibError(
4214  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4215  }
4216  }
4217 #endif
4218 
4219  // Allocate storage for the matrix entries
4220  // The generalised Vector<Vector<>> structure is required
4221  // for the most general interface to sparse_assemble() which allows
4222  // the assembly of multiple matrices at once.
4223  Vector<int*> row_index(1);
4224  Vector<int*> column_start(1);
4225  Vector<double*> value(1);
4226 
4227  // Allocate generalised storage format for passing to sparse_assemble()
4228  Vector<double*> res(1);
4229 
4230  // allocate storage for the number of non-zeros in each matrix
4231  Vector<unsigned> nnz(1);
4232 
4233  // The matrix is in compressed column format
4234  bool compressed_row_flag = false;
4235 
4236  // get the distribution for the residuals
4237  LinearAlgebraDistribution* dist_pt;
4238  if (!residuals.built())
4239  {
4240  dist_pt =
4241  new LinearAlgebraDistribution(Communicator_pt, this->ndof(), false);
4242  }
4243  else
4244  {
4245  dist_pt = new LinearAlgebraDistribution(residuals.distribution_pt());
4246  }
4247 
4248 #ifdef OOMPH_HAS_MPI
4249  if (communicator_pt()->nproc() == 1)
4250  {
4251 #endif
4253  row_index, column_start, value, nnz, res, compressed_row_flag);
4254  jacobian.build_without_copy(
4255  value[0], row_index[0], column_start[0], nnz[0], n_dof, n_dof);
4256  residuals.build(dist_pt, 0.0);
4257  residuals.set_external_values(res[0], true);
4258 #ifdef OOMPH_HAS_MPI
4259  }
4260  else
4261  {
4262  std::ostringstream error_stream;
4263  error_stream << "Cannot assemble a CCDoubleMatrix Jacobian on more "
4264  << "than one processor.";
4265  throw OomphLibError(
4266  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4267  }
4268 #endif
4269 
4270  // clean up
4271  delete dist_pt;
4272  }
4273 
4274 
4275  //===================================================================
4276  /// Set all pinned values to zero.
4277  /// Used to set boundary conditions to be homogeneous in the copy
4278  /// of the problem used in adaptive bifurcation tracking
4279  /// (ALH: TEMPORARY HACK, WILL BE FIXED)
4280  //==================================================================
4282  {
4283  // NOTE THIS DOES NOT ZERO ANY SPINE DATA, but otherwise everything else
4284  // should be zeroed
4285 
4286  // Zero any pinned global Data
4287  const unsigned n_global_data = nglobal_data();
4288  for (unsigned i = 0; i < n_global_data; i++)
4289  {
4290  Data* const local_data_pt = Global_data_pt[i];
4291  const unsigned n_value = local_data_pt->nvalue();
4292  for (unsigned j = 0; j < n_value; j++)
4293  {
4294  // If the data value is pinned set the value to zero
4295  if (local_data_pt->is_pinned(j))
4296  {
4297  local_data_pt->set_value(j, 0.0);
4298  }
4299  }
4300  }
4301 
4302  // Loop over the submeshes:
4303  const unsigned n_sub_mesh = Sub_mesh_pt.size();
4304  if (n_sub_mesh == 0)
4305  {
4306  // Loop over the nodes in the element
4307  const unsigned n_node = Mesh_pt->nnode();
4308  for (unsigned n = 0; n < n_node; n++)
4309  {
4310  Node* const local_node_pt = Mesh_pt->node_pt(n);
4311  const unsigned n_value = local_node_pt->nvalue();
4312  for (unsigned j = 0; j < n_value; j++)
4313  {
4314  // If the data value is pinned set the value to zero
4315  if (local_node_pt->is_pinned(j))
4316  {
4317  local_node_pt->set_value(j, 0.0);
4318  }
4319  }
4320 
4321  // Try to cast to a solid node
4322  SolidNode* const local_solid_node_pt =
4323  dynamic_cast<SolidNode*>(local_node_pt);
4324  // If we are successful
4325  if (local_solid_node_pt)
4326  {
4327  // Find the dimension of the node
4328  const unsigned n_dim = local_solid_node_pt->ndim();
4329  // Find number of positions
4330  const unsigned n_position_type =
4331  local_solid_node_pt->nposition_type();
4332 
4333  for (unsigned k = 0; k < n_position_type; k++)
4334  {
4335  for (unsigned i = 0; i < n_dim; i++)
4336  {
4337  // If the generalised position is pinned,
4338  // set the value to zero
4339  if (local_solid_node_pt->position_is_pinned(k, i))
4340  {
4341  local_solid_node_pt->x_gen(k, i) = 0.0;
4342  }
4343  }
4344  }
4345  }
4346  }
4347 
4348  // Now loop over the element's and zero the internal data
4349  const unsigned n_element = Mesh_pt->nelement();
4350  for (unsigned e = 0; e < n_element; e++)
4351  {
4352  GeneralisedElement* const local_element_pt = Mesh_pt->element_pt(e);
4353  const unsigned n_internal = local_element_pt->ninternal_data();
4354  for (unsigned i = 0; i < n_internal; i++)
4355  {
4356  Data* const local_data_pt = local_element_pt->internal_data_pt(i);
4357  const unsigned n_value = local_data_pt->nvalue();
4358  for (unsigned j = 0; j < n_value; j++)
4359  {
4360  // If the data value is pinned set the value to zero
4361  if (local_data_pt->is_pinned(j))
4362  {
4363  local_data_pt->set_value(j, 0.0);
4364  }
4365  }
4366  }
4367  } // End of loop over elements
4368  }
4369  else
4370  {
4371  // Alternatively loop over all sub meshes
4372  for (unsigned m = 0; m < n_sub_mesh; m++)
4373  {
4374  // Loop over the nodes in the element
4375  const unsigned n_node = Sub_mesh_pt[m]->nnode();
4376  for (unsigned n = 0; n < n_node; n++)
4377  {
4378  Node* const local_node_pt = Sub_mesh_pt[m]->node_pt(n);
4379  const unsigned n_value = local_node_pt->nvalue();
4380  for (unsigned j = 0; j < n_value; j++)
4381  {
4382  // If the data value is pinned set the value to zero
4383  if (local_node_pt->is_pinned(j))
4384  {
4385  local_node_pt->set_value(j, 0.0);
4386  }
4387  }
4388 
4389  // Try to cast to a solid node
4390  SolidNode* const local_solid_node_pt =
4391  dynamic_cast<SolidNode*>(local_node_pt);
4392  // If we are successful
4393  if (local_solid_node_pt)
4394  {
4395  // Find the dimension of the node
4396  const unsigned n_dim = local_solid_node_pt->ndim();
4397  // Find number of positions
4398  const unsigned n_position_type =
4399  local_solid_node_pt->nposition_type();
4400 
4401  for (unsigned k = 0; k < n_position_type; k++)
4402  {
4403  for (unsigned i = 0; i < n_dim; i++)
4404  {
4405  // If the generalised position is pinned,
4406  // set the value to zero
4407  if (local_solid_node_pt->position_is_pinned(k, i))
4408  {
4409  local_solid_node_pt->x_gen(k, i) = 0.0;
4410  }
4411  }
4412  }
4413  }
4414  }
4415 
4416  // Now loop over the element's and zero the internal data
4417  const unsigned n_element = Sub_mesh_pt[m]->nelement();
4418  for (unsigned e = 0; e < n_element; e++)
4419  {
4420  GeneralisedElement* const local_element_pt =
4421  Sub_mesh_pt[m]->element_pt(e);
4422  const unsigned n_internal = local_element_pt->ninternal_data();
4423  for (unsigned i = 0; i < n_internal; i++)
4424  {
4425  Data* const local_data_pt = local_element_pt->internal_data_pt(i);
4426  const unsigned n_value = local_data_pt->nvalue();
4427  for (unsigned j = 0; j < n_value; j++)
4428  {
4429  // If the data value is pinned set the value to zero
4430  if (local_data_pt->is_pinned(j))
4431  {
4432  local_data_pt->set_value(j, 0.0);
4433  }
4434  }
4435  }
4436  } // End of loop over elements
4437  }
4438  }
4439  }
4440 
4441 
4442  //=====================================================================
4443  /// This is a (private) helper function that is used to assemble system
4444  /// matrices in compressed row or column format
4445  /// and compute residual vectors.
4446  /// The default action is to assemble the jacobian matrix and
4447  /// residuals for the Newton method. The action can be
4448  /// overloaded at an elemental level by changing the default
4449  /// behaviour of the function Element::get_all_vectors_and_matrices().
4450  /// column_or_row_index: Column [or row] index of given entry
4451  /// row_or_column_start: Index of first entry for given row [or column]
4452  /// value : Vector of nonzero entries
4453  /// residuals : Residual vector
4454  /// compressed_row_flag: Bool flag to indicate if storage format is
4455  /// compressed row [if false interpretation of
4456  /// arguments is as stated in square brackets].
4457  /// We provide four different assembly methods, each with different
4458  /// memory requirements/execution speeds. The method is set by
4459  /// the public flag Problem::Sparse_assembly_method.
4460  //=====================================================================
4462  Vector<int*>& column_or_row_index,
4463  Vector<int*>& row_or_column_start,
4464  Vector<double*>& value,
4465  Vector<unsigned>& nnz,
4466  Vector<double*>& residuals,
4467  bool compressed_row_flag)
4468  {
4469  // Choose the actual method
4470  switch (Sparse_assembly_method)
4471  {
4473 
4475  column_or_row_index,
4476  row_or_column_start,
4477  value,
4478  nnz,
4479  residuals,
4480  compressed_row_flag);
4481 
4482  break;
4483 
4485 
4487  column_or_row_index,
4488  row_or_column_start,
4489  value,
4490  nnz,
4491  residuals,
4492  compressed_row_flag);
4493 
4494  break;
4495 
4497 
4499  row_or_column_start,
4500  value,
4501  nnz,
4502  residuals,
4503  compressed_row_flag);
4504 
4505  break;
4506 
4508 
4510  column_or_row_index,
4511  row_or_column_start,
4512  value,
4513  nnz,
4514  residuals,
4515  compressed_row_flag);
4516 
4517  break;
4518 
4520 
4522  column_or_row_index,
4523  row_or_column_start,
4524  value,
4525  nnz,
4526  residuals,
4527  compressed_row_flag);
4528 
4529  break;
4530 
4531  default:
4532 
4533  std::ostringstream error_stream;
4534  error_stream
4535  << "Error: Incorrect value for Problem::Sparse_assembly_method"
4536  << Sparse_assembly_method << std::endl
4537  << "It should be one of the enumeration Problem::Assembly_method"
4538  << std::endl;
4539  throw OomphLibError(
4540  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4541  }
4542  }
4543 
4544 
4545  //=====================================================================
4546  /// This is a (private) helper function that is used to assemble system
4547  /// matrices in compressed row or column format
4548  /// and compute residual vectors, using maps
4549  /// The default action is to assemble the jacobian matrix and
4550  /// residuals for the Newton method. The action can be
4551  /// overloaded at an elemental level by chaging the default
4552  /// behaviour of the function Element::get_all_vectors_and_matrices().
4553  /// column_or_row_index: Column [or row] index of given entry
4554  /// row_or_column_start: Index of first entry for given row [or column]
4555  /// value : Vector of nonzero entries
4556  /// residuals : Residual vector
4557  /// compressed_row_flag: Bool flag to indicate if storage format is
4558  /// compressed row [if false interpretation of
4559  /// arguments is as stated in square brackets].
4560  //=====================================================================
4562  Vector<int*>& column_or_row_index,
4563  Vector<int*>& row_or_column_start,
4564  Vector<double*>& value,
4565  Vector<unsigned>& nnz,
4566  Vector<double*>& residuals,
4567  bool compressed_row_flag)
4568  {
4569  // Total number of elements
4570  const unsigned long n_elements = mesh_pt()->nelement();
4571 
4572  // Default range of elements for distributed problems
4573  unsigned long el_lo = 0;
4574  unsigned long el_hi = n_elements - 1;
4575 
4576 #ifdef OOMPH_HAS_MPI
4577  // Otherwise just loop over a fraction of the elements
4578  // (This will either have been initialised in
4579  // Problem::set_default_first_and_last_element_for_assembly() or
4580  // will have been re-assigned during a previous assembly loop
4581  // Note that following the re-assignment only the entries
4582  // for the current processor are relevant.
4584  {
4585  el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
4586  el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
4587  }
4588 #endif
4589 
4590  // number of dofs
4591  unsigned ndof = this->ndof();
4592 
4593  // Find the number of vectors to be assembled
4594  const unsigned n_vector = residuals.size();
4595 
4596  // Find the number of matrices to be assembled
4597  const unsigned n_matrix = column_or_row_index.size();
4598 
4599  // Locally cache pointer to assembly handler
4601 
4602 #ifdef OOMPH_HAS_MPI
4603  bool doing_residuals = false;
4604  if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
4605  {
4606  doing_residuals = true;
4607  }
4608 #endif
4609 
4610 // Error check dimensions
4611 #ifdef PARANOID
4612  if (row_or_column_start.size() != n_matrix)
4613  {
4614  std::ostringstream error_stream;
4615  error_stream << "Error: " << std::endl
4616  << "row_or_column_start.size() "
4617  << row_or_column_start.size() << " does not equal "
4618  << "column_or_row_index.size() "
4619  << column_or_row_index.size() << std::endl;
4620  throw OomphLibError(
4621  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4622  }
4623 
4624  if (value.size() != n_matrix)
4625  {
4626  std::ostringstream error_stream;
4627  error_stream
4628  << "Error in Problem::sparse_assemble_row_or_column_compressed "
4629  << std::endl
4630  << "value.size() " << value.size() << " does not equal "
4631  << "column_or_row_index.size() " << column_or_row_index.size()
4632  << std::endl
4633  << std::endl
4634  << std::endl;
4635  throw OomphLibError(
4636  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4637  }
4638 #endif
4639 
4640 
4641  // The idea behind this sparse assembly routine is to use a vector of
4642  // maps for the entries in each row or column of the complete matrix.
4643  // The key for each map is the global row or column number and
4644  // the default comparison operator for integers means that each map
4645  // is ordered by the global row or column number. Thus, we need not
4646  // sort the maps, that happens at each insertion of a new entry. The
4647  // price we pay is that for large maps, inseration is not a
4648  // cheap operation. Hash maps can be used to increase the speed, but then
4649  // the ordering is lost and we would have to sort anyway. The solution if
4650  // speed is required is to use lists, see below.
4651 
4652 
4653  // Set up a vector of vectors of maps of entries of each matrix,
4654  // indexed by either the column or row. The entries of the vector for
4655  // each matrix correspond to all the rows or columns of that matrix.
4656  // The use of the map storage
4657  // scheme, with its implicit ordering on the first index, gives
4658  // a sparse ordered list of the entries in the given row or column.
4659  Vector<Vector<std::map<unsigned, double>>> matrix_data_map(n_matrix);
4660  // Loop over the number of matrices being assembled and resize
4661  // each vector of maps to the number of rows or columns of the matrix
4662  for (unsigned m = 0; m < n_matrix; m++)
4663  {
4664  matrix_data_map[m].resize(ndof);
4665  }
4666 
4667  // Resize the residuals vectors
4668  for (unsigned v = 0; v < n_vector; v++)
4669  {
4670  residuals[v] = new double[ndof];
4671  for (unsigned i = 0; i < ndof; i++)
4672  {
4673  residuals[v][i] = 0;
4674  }
4675  }
4676 
4677 
4678 #ifdef OOMPH_HAS_MPI
4679 
4680 
4681  // Storage for assembly time for elements
4682  double t_assemble_start = 0.0;
4683 
4684  // Storage for assembly times
4685  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
4686  {
4687  Elemental_assembly_time.resize(n_elements);
4688  }
4689 
4690 #endif
4691 
4692  //----------------Assemble and populate the maps-------------------------
4693  {
4694  // Allocate local storage for the element's contribution to the
4695  // residuals vectors and system matrices of the size of the maximum
4696  // number of dofs in any element.
4697  // This means that the storage is only allocated (and deleted) once
4698  Vector<Vector<double>> el_residuals(n_vector);
4699  Vector<DenseMatrix<double>> el_jacobian(n_matrix);
4700 
4701  // Loop over the elements for this processor
4702  for (unsigned long e = el_lo; e <= el_hi; e++)
4703  {
4704 #ifdef OOMPH_HAS_MPI
4705  // Time it?
4706  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
4707  {
4708  t_assemble_start = TimingHelpers::timer();
4709  }
4710 #endif
4711 
4712  // Get the pointer to the element
4713  GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
4714 
4715 #ifdef OOMPH_HAS_MPI
4716  // Ignore halo elements
4717  if (!elem_pt->is_halo())
4718  {
4719 #endif
4720 
4721  // Find number of degrees of freedom in the element
4722  const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
4723 
4724  // Resize the storage for elemental jacobian and residuals
4725  for (unsigned v = 0; v < n_vector; v++)
4726  {
4727  el_residuals[v].resize(nvar);
4728  }
4729  for (unsigned m = 0; m < n_matrix; m++)
4730  {
4731  el_jacobian[m].resize(nvar);
4732  }
4733 
4734  // Now get the residuals and jacobian for the element
4736  elem_pt, el_residuals, el_jacobian);
4737 
4738  //---------------Insert the values into the maps--------------
4739 
4740  // Loop over the first index of local variables
4741  for (unsigned i = 0; i < nvar; i++)
4742  {
4743  // Get the local equation number
4744  unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
4745 
4746  // Add the contribution to the residuals
4747  for (unsigned v = 0; v < n_vector; v++)
4748  {
4749  // Fill in each residuals vector
4750  residuals[v][eqn_number] += el_residuals[v][i];
4751  }
4752 
4753  // Now loop over the other index
4754  for (unsigned j = 0; j < nvar; j++)
4755  {
4756  // Get the number of the unknown
4757  unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
4758 
4759  // Loop over the matrices
4760  for (unsigned m = 0; m < n_matrix; m++)
4761  {
4762  // Get the value of the matrix at this point
4763  double value = el_jacobian[m](i, j);
4764  // Only bother to add to the map if it's non-zero
4765  if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
4766  {
4767  // If it's compressed row storage, then our vector of maps
4768  // is indexed by row (equation number)
4769  if (compressed_row_flag)
4770  {
4771  // Add the data into the map using the unknown as the map
4772  // key
4773  matrix_data_map[m][eqn_number][unknown] += value;
4774  }
4775  // Otherwise it's compressed column storage and our vector is
4776  // indexed by column (the unknown)
4777  else
4778  {
4779  // Add the data into the map using the eqn_numbe as the map
4780  // key
4781  matrix_data_map[m][unknown][eqn_number] += value;
4782  }
4783  }
4784  } // End of loop over matrices
4785  }
4786  }
4787 
4788 #ifdef OOMPH_HAS_MPI
4789  } // endif halo element
4790 #endif
4791 
4792 
4793 #ifdef OOMPH_HAS_MPI
4794  // Time it?
4795  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
4796  {
4798  TimingHelpers::timer() - t_assemble_start;
4799  }
4800 #endif
4801 
4802  } // End of loop over the elements
4803 
4804  } // End of map assembly
4805 
4806 
4807 #ifdef OOMPH_HAS_MPI
4808 
4809  // Postprocess timing information and re-allocate distribution of
4810  // elements during subsequent assemblies.
4811  if ((!doing_residuals) && (!Problem_has_been_distributed) &&
4813  {
4815  }
4816 
4817  // We have determined load balancing for current setup.
4818  // This can remain the same until assign_eqn_numbers() is called
4819  // again -- the flag is re-set to true there.
4820  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
4821  {
4823  }
4824 
4825 #endif
4826 
4827 
4828  //-----------Finally we need to convert the beautiful map storage scheme
4829  //------------------------to the containers required by SuperLU
4830 
4831  // Loop over the number of matrices
4832  for (unsigned m = 0; m < n_matrix; m++)
4833  {
4834  // Set the number of rows or columns
4835  row_or_column_start[m] = new int[ndof + 1];
4836  // Counter for the total number of entries in the storage scheme
4837  unsigned long entry_count = 0;
4838  row_or_column_start[m][0] = entry_count;
4839 
4840  // first we compute the number of non-zeros
4841  nnz[m] = 0;
4842  for (unsigned long i_global = 0; i_global < ndof; i_global++)
4843  {
4844  nnz[m] += matrix_data_map[m][i_global].size();
4845  }
4846 
4847  // and then resize the storage
4848  column_or_row_index[m] = new int[nnz[m]];
4849  value[m] = new double[nnz[m]];
4850 
4851  // Now we merely loop over the number of rows or columns
4852  for (unsigned long i_global = 0; i_global < ndof; i_global++)
4853  {
4854  // Start index for the present row
4855  row_or_column_start[m][i_global] = entry_count;
4856  // If there are no entries in the map then skip the rest of the loop
4857  if (matrix_data_map[m][i_global].empty())
4858  {
4859  continue;
4860  }
4861 
4862  // Loop over all the entries in the map corresponding to the given
4863  // row or column. It will be ordered
4864 
4865  for (std::map<unsigned, double>::iterator it =
4866  matrix_data_map[m][i_global].begin();
4867  it != matrix_data_map[m][i_global].end();
4868  ++it)
4869  {
4870  // The first value is the column or row index
4871  column_or_row_index[m][entry_count] = it->first;
4872  // The second value is the actual data value
4873  value[m][entry_count] = it->second;
4874  // Increase the value of the counter
4875  entry_count++;
4876  }
4877  }
4878 
4879  // Final entry in the row/column start vector
4880  row_or_column_start[m][ndof] = entry_count;
4881  } // End of the loop over the matrices
4882 
4884  {
4885  oomph_info << "Pausing at end of sparse assembly." << std::endl;
4886  pause("Check memory usage now.");
4887  }
4888  }
4889 
4890 
4891  //=====================================================================
4892  /// This is a (private) helper function that is used to assemble system
4893  /// matrices in compressed row or column format
4894  /// and compute residual vectors using lists
4895  /// The default action is to assemble the jacobian matrix and
4896  /// residuals for the Newton method. The action can be
4897  /// overloaded at an elemental level by chaging the default
4898  /// behaviour of the function Element::get_all_vectors_and_matrices().
4899  /// column_or_row_index: Column [or row] index of given entry
4900  /// row_or_column_start: Index of first entry for given row [or column]
4901  /// value : Vector of nonzero entries
4902  /// residuals : Residual vector
4903  /// compressed_row_flag: Bool flag to indicate if storage format is
4904  /// compressed row [if false interpretation of
4905  /// arguments is as stated in square brackets].
4906  //=====================================================================
4908  Vector<int*>& column_or_row_index,
4909  Vector<int*>& row_or_column_start,
4910  Vector<double*>& value,
4911  Vector<unsigned>& nnz,
4912  Vector<double*>& residuals,
4913  bool compressed_row_flag)
4914  {
4915  // Total number of elements
4916  const unsigned long n_elements = mesh_pt()->nelement();
4917 
4918  // Default range of elements for distributed problems
4919  unsigned long el_lo = 0;
4920  unsigned long el_hi = n_elements - 1;
4921 
4922 #ifdef OOMPH_HAS_MPI
4923  // Otherwise just loop over a fraction of the elements
4924  // (This will either have been initialised in
4925  // Problem::set_default_first_and_last_element_for_assembly() or
4926  // will have been re-assigned during a previous assembly loop
4927  // Note that following the re-assignment only the entries
4928  // for the current processor are relevant.
4930  {
4931  el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
4932  el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
4933  }
4934 #endif
4935 
4936  // number of dofs
4937  unsigned ndof = this->ndof();
4938 
4939  // Find the number of vectors to be assembled
4940  const unsigned n_vector = residuals.size();
4941 
4942  // Find the number of matrices to be assembled
4943  const unsigned n_matrix = column_or_row_index.size();
4944 
4945  // Locally cache pointer to assembly handler
4947 
4948 #ifdef OOMPH_HAS_MPI
4949  bool doing_residuals = false;
4950  if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
4951  {
4952  doing_residuals = true;
4953  }
4954 #endif
4955 
4956 // Error check dimensions
4957 #ifdef PARANOID
4958  if (row_or_column_start.size() != n_matrix)
4959  {
4960  std::ostringstream error_stream;
4961  error_stream << "Error: " << std::endl
4962  << "row_or_column_start.size() "
4963  << row_or_column_start.size() << " does not equal "
4964  << "column_or_row_index.size() "
4965  << column_or_row_index.size() << std::endl;
4966  throw OomphLibError(
4967  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4968  }
4969 
4970  if (value.size() != n_matrix)
4971  {
4972  std::ostringstream error_stream;
4973  error_stream
4974  << "Error in Problem::sparse_assemble_row_or_column_compressed "
4975  << std::endl
4976  << "value.size() " << value.size() << " does not equal "
4977  << "column_or_row_index.size() " << column_or_row_index.size()
4978  << std::endl
4979  << std::endl
4980  << std::endl;
4981  throw OomphLibError(
4982  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
4983  }
4984 #endif
4985 
4986  // The idea behind this sparse assembly routine is to use a vector of
4987  // lists for the entries in each row or column of the complete matrix.
4988  // The lists contain pairs of entries (global row/column number, value).
4989  // All non-zero contributions from each element are added to the lists.
4990  // We then sort each list by global row/column number and then combine
4991  // the entries corresponding to each row/column before adding to the
4992  // vectors column_or_row_index and value.
4993 
4994  // Note the trade off for "fast assembly" is that we will require
4995  // more memory during the assembly phase. Then again, if we can
4996  // only just assemble the sparse matrix, we're in real trouble.
4997 
4998  // Set up a vector of lists of paired entries of
4999  //(row/column index, jacobian matrix entry).
5000  // The entries of the vector correspond to all the rows or columns.
5001  // The use of the list storage scheme, should give fast insertion
5002  // and fast sorts later.
5004  n_matrix);
5005  // Loop over the number of matrices and resize
5006  for (unsigned m = 0; m < n_matrix; m++)
5007  {
5008  matrix_data_list[m].resize(ndof);
5009  }
5010 
5011  // Resize the residuals vectors
5012  for (unsigned v = 0; v < n_vector; v++)
5013  {
5014  residuals[v] = new double[ndof];
5015  for (unsigned i = 0; i < ndof; i++)
5016  {
5017  residuals[v][i] = 0;
5018  }
5019  }
5020 
5021 #ifdef OOMPH_HAS_MPI
5022 
5023 
5024  // Storage for assembly time for elements
5025  double t_assemble_start = 0.0;
5026 
5027  // Storage for assembly times
5028  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5029  {
5030  Elemental_assembly_time.resize(n_elements);
5031  }
5032 
5033 #endif
5034 
5035  //------------Assemble and populate the lists-----------------------
5036  {
5037  // Allocate local storage for the element's contribution to the
5038  // residuals vectors and system matrices of the size of the maximum
5039  // number of dofs in any element.
5040  // This means that the stored is only allocated (and deleted) once
5041  Vector<Vector<double>> el_residuals(n_vector);
5042  Vector<DenseMatrix<double>> el_jacobian(n_matrix);
5043 
5044 
5045  // Pointer to a single list to be used during the assembly
5046  std::list<std::pair<unsigned, double>>* list_pt;
5047 
5048  // Loop over the all elements
5049  for (unsigned long e = el_lo; e <= el_hi; e++)
5050  {
5051 #ifdef OOMPH_HAS_MPI
5052  // Time it?
5053  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5054  {
5055  t_assemble_start = TimingHelpers::timer();
5056  }
5057 #endif
5058 
5059  // Get the pointer to the element
5060  GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
5061 
5062 #ifdef OOMPH_HAS_MPI
5063  // Ignore halo elements
5064  if (!elem_pt->is_halo())
5065  {
5066 #endif
5067 
5068  // Find number of degrees of freedom in the element
5069  const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
5070 
5071  // Resize the storage for the elemental jacobian and residuals
5072  for (unsigned v = 0; v < n_vector; v++)
5073  {
5074  el_residuals[v].resize(nvar);
5075  }
5076  for (unsigned m = 0; m < n_matrix; m++)
5077  {
5078  el_jacobian[m].resize(nvar);
5079  }
5080 
5081  // Now get the residuals and jacobian for the element
5083  elem_pt, el_residuals, el_jacobian);
5084 
5085  //---------------- Insert the values into the lists -----------
5086 
5087  // Loop over the first index of local variables
5088  for (unsigned i = 0; i < nvar; i++)
5089  {
5090  // Get the local equation number
5091  unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
5092 
5093  // Add the contribution to the residuals
5094  for (unsigned v = 0; v < n_vector; v++)
5095  {
5096  // Fill in the residuals vector
5097  residuals[v][eqn_number] += el_residuals[v][i];
5098  }
5099 
5100  // Now loop over the other index
5101  for (unsigned j = 0; j < nvar; j++)
5102  {
5103  // Get the number of the unknown
5104  unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
5105 
5106  // Loop over the matrices
5107  for (unsigned m = 0; m < n_matrix; m++)
5108  {
5109  // Get the value of the matrix at this point
5110  double value = el_jacobian[m](i, j);
5111  // Only add to theif it's non-zero
5112  if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
5113  {
5114  // If it's compressed row storage, then our vector is indexed
5115  // by row (the equation number)
5116  if (compressed_row_flag)
5117  {
5118  // Find the list that corresponds to the desired row
5119  list_pt = &matrix_data_list[m][eqn_number];
5120  // Insert the data into the list, the first entry
5121  // in the pair is the unknown (column index),
5122  // the second is the value itself.
5123  list_pt->insert(list_pt->end(),
5124  std::make_pair(unknown, value));
5125  }
5126  // Otherwise it's compressed column storage, and our
5127  // vector is indexed by column (the unknown)
5128  else
5129  {
5130  // Find the list that corresponds to the desired column
5131  list_pt = &matrix_data_list[m][unknown];
5132  // Insert the data into the list, the first entry
5133  // in the pair is the equation number (row index),
5134  // the second is the value itself.
5135  list_pt->insert(list_pt->end(),
5136  std::make_pair(eqn_number, value));
5137  }
5138  }
5139  }
5140  }
5141  }
5142 
5143 #ifdef OOMPH_HAS_MPI
5144  } // endif halo element
5145 #endif
5146 
5147 
5148 #ifdef OOMPH_HAS_MPI
5149  // Time it?
5150  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5151  {
5153  TimingHelpers::timer() - t_assemble_start;
5154  }
5155 #endif
5156 
5157  } // End of loop over the elements
5158 
5159  } // list_pt goes out of scope
5160 
5161 
5162 #ifdef OOMPH_HAS_MPI
5163 
5164  // Postprocess timing information and re-allocate distribution of
5165  // elements during subsequent assemblies.
5166  if ((!doing_residuals) && (!Problem_has_been_distributed) &&
5168  {
5170  }
5171 
5172  // We have determined load balancing for current setup.
5173  // This can remain the same until assign_eqn_numbers() is called
5174  // again -- the flag is re-set to true there.
5175  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5176  {
5178  }
5179 
5180 #endif
5181 
5182 
5183  //----Finally we need to convert the beautiful list storage scheme---
5184  //----------to the containers required by SuperLU--------------------
5185 
5186  // Loop over the number of matrices
5187  for (unsigned m = 0; m < n_matrix; m++)
5188  {
5189  // Set the number of rows or columns
5190  row_or_column_start[m] = new int[ndof + 1];
5191  // Counter for the total number of entries in the storage scheme
5192  unsigned long entry_count = 0;
5193  // The first entry is 0
5194  row_or_column_start[m][0] = entry_count;
5195 
5196  // first we compute the number of non-zeros
5197  nnz[m] = 0;
5198  for (unsigned long i_global = 0; i_global < ndof; i_global++)
5199  {
5200  nnz[m] += matrix_data_list[m][i_global].size();
5201  }
5202 
5203  // and then resize the storage
5204  column_or_row_index[m] = new int[nnz[m]];
5205  value[m] = new double[nnz[m]];
5206 
5207  // Now we merely loop over the number of rows or columns
5208  for (unsigned long i_global = 0; i_global < ndof; i_global++)
5209  {
5210  // Start index for the present row is the number of entries so far
5211  row_or_column_start[m][i_global] = entry_count;
5212  // If there are no entries in the list then skip the loop
5213  if (matrix_data_list[m][i_global].empty())
5214  {
5215  continue;
5216  }
5217 
5218  // Sort the list corresponding to this row or column by the
5219  // column or row index (first entry in the pair).
5220  // This might be inefficient, but we only have to do the sort ONCE
5221  // for each list. This is faster than using a map storage scheme, where
5222  // we are sorting for every insertion (although the map structure
5223  // is cleaner and more memory efficient)
5224  matrix_data_list[m][i_global].sort();
5225 
5226  // Set up an iterator for start of the list
5227  std::list<std::pair<unsigned, double>>::iterator it =
5228  matrix_data_list[m][i_global].begin();
5229 
5230  // Get the first row or column index in the list...
5231  unsigned current_index = it->first;
5232  //...and the corresponding value
5233  double current_value = it->second;
5234 
5235  // Loop over all the entries in the sorted list
5236  // Increase the iterator so that we start at the second entry
5237  for (++it; it != matrix_data_list[m][i_global].end(); ++it)
5238  {
5239  // If the index has not changed, then we must add the contribution
5240  // of the present entry to the value.
5241  // Additionally check that the entry is non-zero
5242  if ((it->first == current_index) &&
5243  (std::fabs(it->second) > Numerical_zero_for_sparse_assembly))
5244  {
5245  current_value += it->second;
5246  }
5247  // Otherwise, we have added all the contributions to the index
5248  // to current_value, so add it to the SuperLU data structure
5249  else
5250  {
5251  // Add the row or column index to the vector
5252  column_or_row_index[m][entry_count] = current_index;
5253  // Add the actual value to the vector
5254  value[m][entry_count] = current_value;
5255  // Increase the counter for the number of entries in each vector
5256  entry_count++;
5257 
5258  // Set the index and value to be those of the current entry in the
5259  // list
5260  current_index = it->first;
5261  current_value = it->second;
5262  }
5263  } // End of loop over all list entries for this global row or column
5264 
5265  // There are TWO special cases to consider.
5266  // If there is only one equation number in the list, then it
5267  // will NOT have been added. We test this case by comparing the
5268  // number of entries with those stored in row_or_column_start[i_global]
5269  // Otherwise
5270  // If the final entry in the list has the same index as the penultimate
5271  // entry, then it will NOT have been added to the SuperLU storage scheme
5272  // Check this by comparing the current_index with the final index
5273  // stored in the SuperLU scheme. If they are not the same, then
5274  // add the current_index and value.
5275 
5276  // If single equation number in list
5277  if ((static_cast<int>(entry_count) == row_or_column_start[m][i_global])
5278  // If we have a single equation number, this will not be evaluated.
5279  // If we don't then we do the test to check that the final
5280  // entry is added
5281  || (static_cast<int>(current_index) !=
5282  column_or_row_index[m][entry_count - 1]))
5283  {
5284  // Add the row or column index to the vector
5285  column_or_row_index[m][entry_count] = current_index;
5286  // Add the actual value to the vector
5287  value[m][entry_count] = current_value;
5288  // Increase the counter for the number of entries in each vector
5289  entry_count++;
5290  }
5291 
5292  } // End of loop over the rows or columns of the entire matrix
5293 
5294  // Final entry in the row/column start vector
5295  row_or_column_start[m][ndof] = entry_count;
5296  } // End of loop over matrices
5297 
5299  {
5300  oomph_info << "Pausing at end of sparse assembly." << std::endl;
5301  pause("Check memory usage now.");
5302  }
5303  }
5304 
5305 
5306  //=====================================================================
5307  /// This is a (private) helper function that is used to assemble system
5308  /// matrices in compressed row or column format
5309  /// and compute residual vectors using vectors of pairs
5310  /// The default action is to assemble the jacobian matrix and
5311  /// residuals for the Newton method. The action can be
5312  /// overloaded at an elemental level by chaging the default
5313  /// behaviour of the function Element::get_all_vectors_and_matrices().
5314  /// column_or_row_index: Column [or row] index of given entry
5315  /// row_or_column_start: Index of first entry for given row [or column]
5316  /// value : Vector of nonzero entries
5317  /// residuals : Residual vector
5318  /// compressed_row_flag: Bool flag to indicate if storage format is
5319  /// compressed row [if false interpretation of
5320  /// arguments is as stated in square brackets].
5321  //=====================================================================
5323  Vector<int*>& column_or_row_index,
5324  Vector<int*>& row_or_column_start,
5325  Vector<double*>& value,
5326  Vector<unsigned>& nnz,
5327  Vector<double*>& residuals,
5328  bool compressed_row_flag)
5329  {
5330  // Total number of elements
5331  const unsigned long n_elements = mesh_pt()->nelement();
5332 
5333  // Default range of elements for distributed problems
5334  unsigned long el_lo = 0;
5335  unsigned long el_hi = n_elements - 1;
5336 
5337 #ifdef OOMPH_HAS_MPI
5338  // Otherwise just loop over a fraction of the elements
5339  // (This will either have been initialised in
5340  // Problem::set_default_first_and_last_element_for_assembly() or
5341  // will have been re-assigned during a previous assembly loop
5342  // Note that following the re-assignment only the entries
5343  // for the current processor are relevant.
5345  {
5346  el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
5347  el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
5348  }
5349 #endif
5350 
5351  // number of local eqns
5352  unsigned ndof = this->ndof();
5353 
5354  // Find the number of vectors to be assembled
5355  const unsigned n_vector = residuals.size();
5356 
5357  // Find the number of matrices to be assembled
5358  const unsigned n_matrix = column_or_row_index.size();
5359 
5360  // Locally cache pointer to assembly handler
5362 
5363 #ifdef OOMPH_HAS_MPI
5364  bool doing_residuals = false;
5365  if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
5366  {
5367  doing_residuals = true;
5368  }
5369 #endif
5370 
5371 // Error check dimensions
5372 #ifdef PARANOID
5373  if (row_or_column_start.size() != n_matrix)
5374  {
5375  std::ostringstream error_stream;
5376  error_stream << "Error: " << std::endl
5377  << "row_or_column_start.size() "
5378  << row_or_column_start.size() << " does not equal "
5379  << "column_or_row_index.size() "
5380  << column_or_row_index.size() << std::endl;
5381  throw OomphLibError(
5382  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
5383  }
5384 
5385  if (value.size() != n_matrix)
5386  {
5387  std::ostringstream error_stream;
5388  error_stream << "Error: " << std::endl
5389  << "value.size() " << value.size() << " does not equal "
5390  << "column_or_row_index.size() "
5391  << column_or_row_index.size() << std::endl
5392  << std::endl
5393  << std::endl;
5394  throw OomphLibError(
5395  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
5396  }
5397 #endif
5398 
5399 
5400  // The idea behind this sparse assembly routine is to use a Vector of
5401  // Vectors of pairs for each complete matrix.
5402  // Each inner Vector stores pairs and holds the row (or column) index
5403  // and the value of the matrix entry.
5404 
5405  // Set up Vector of Vectors to store the entries of each matrix,
5406  // indexed by either the column or row.
5407  Vector<Vector<Vector<std::pair<unsigned, double>>>> matrix_data(n_matrix);
5408 
5409  // Loop over the number of matrices being assembled and resize
5410  // each Vector of Vectors to the number of rows or columns of the matrix
5411  for (unsigned m = 0; m < n_matrix; m++)
5412  {
5413  matrix_data[m].resize(ndof);
5414  }
5415 
5416  // Resize the residuals vectors
5417  for (unsigned v = 0; v < n_vector; v++)
5418  {
5419  residuals[v] = new double[ndof];
5420  for (unsigned i = 0; i < ndof; i++)
5421  {
5422  residuals[v][i] = 0;
5423  }
5424  }
5425 
5426 #ifdef OOMPH_HAS_MPI
5427 
5428  // Storage for assembly time for elements
5429  double t_assemble_start = 0.0;
5430 
5431  // Storage for assembly times
5432  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5433  {
5434  Elemental_assembly_time.resize(n_elements);
5435  }
5436 
5437 #endif
5438 
5439  //----------------Assemble and populate the vector storage scheme--------
5440  {
5441  // Allocate local storage for the element's contribution to the
5442  // residuals vectors and system matrices of the size of the maximum
5443  // number of dofs in any element
5444  // This means that the storage is only allocated (and deleted) once
5445  Vector<Vector<double>> el_residuals(n_vector);
5446  Vector<DenseMatrix<double>> el_jacobian(n_matrix);
5447 
5448  // Loop over the elements
5449  for (unsigned long e = el_lo; e <= el_hi; e++)
5450  {
5451 #ifdef OOMPH_HAS_MPI
5452  // Time it?
5453  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5454  {
5455  t_assemble_start = TimingHelpers::timer();
5456  }
5457 #endif
5458 
5459  // Get the pointer to the element
5460  GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
5461 
5462 #ifdef OOMPH_HAS_MPI
5463  // Ignore halo elements
5464  if (!elem_pt->is_halo())
5465  {
5466 #endif
5467 
5468  // Find number of degrees of freedom in the element
5469  const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
5470 
5471  // Resize the storage for elemental jacobian and residuals
5472  for (unsigned v = 0; v < n_vector; v++)
5473  {
5474  el_residuals[v].resize(nvar);
5475  }
5476  for (unsigned m = 0; m < n_matrix; m++)
5477  {
5478  el_jacobian[m].resize(nvar);
5479  }
5480 
5481  // Now get the residuals and jacobian for the element
5483  elem_pt, el_residuals, el_jacobian);
5484 
5485  //---------------Insert the values into the vectors--------------
5486 
5487  // Loop over the first index of local variables
5488  for (unsigned i = 0; i < nvar; i++)
5489  {
5490  // Get the local equation number
5491  unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
5492 
5493  // Add the contribution to the residuals
5494  for (unsigned v = 0; v < n_vector; v++)
5495  {
5496  // Fill in each residuals vector
5497  residuals[v][eqn_number] += el_residuals[v][i];
5498  }
5499 
5500  // Now loop over the other index
5501  for (unsigned j = 0; j < nvar; j++)
5502  {
5503  // Get the number of the unknown
5504  unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
5505 
5506  // Loop over the matrices
5507  // If it's compressed row storage, then our vector of maps
5508  // is indexed by row (equation number)
5509  for (unsigned m = 0; m < n_matrix; m++)
5510  {
5511  // Get the value of the matrix at this point
5512  double value = el_jacobian[m](i, j);
5513  // Only bother to add to the vector if it's non-zero
5514  if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
5515  {
5516  // If it's compressed row storage, then our vector of maps
5517  // is indexed by row (equation number)
5518  if (compressed_row_flag)
5519  {
5520  // Find the correct position and add the data into the
5521  // vectors
5522  const unsigned size = matrix_data[m][eqn_number].size();
5523  for (unsigned k = 0; k <= size; k++)
5524  {
5525  if (k == size)
5526  {
5527  matrix_data[m][eqn_number].push_back(
5528  std::make_pair(unknown, value));
5529  break;
5530  }
5531  else if (matrix_data[m][eqn_number][k].first == unknown)
5532  {
5533  matrix_data[m][eqn_number][k].second += value;
5534  break;
5535  }
5536  }
5537  }
5538  // Otherwise it's compressed column storage and our vector is
5539  // indexed by column (the unknown)
5540  else
5541  {
5542  // Add the data into the vectors in the correct position
5543  const unsigned size = matrix_data[m][unknown].size();
5544  for (unsigned k = 0; k <= size; k++)
5545  {
5546  if (k == size)
5547  {
5548  matrix_data[m][unknown].push_back(
5549  std::make_pair(eqn_number, value));
5550  break;
5551  }
5552  else if (matrix_data[m][unknown][k].first == eqn_number)
5553  {
5554  matrix_data[m][unknown][k].second += value;
5555  break;
5556  }
5557  }
5558  }
5559  }
5560  } // End of loop over matrices
5561  }
5562  }
5563 
5564 #ifdef OOMPH_HAS_MPI
5565  } // endif halo element
5566 #endif
5567 
5568 
5569 #ifdef OOMPH_HAS_MPI
5570  // Time it?
5571  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5572  {
5574  TimingHelpers::timer() - t_assemble_start;
5575  }
5576 #endif
5577 
5578  } // End of loop over the elements
5579 
5580 
5581  } // End of vector assembly
5582 
5583 
5584 #ifdef OOMPH_HAS_MPI
5585 
5586  // Postprocess timing information and re-allocate distribution of
5587  // elements during subsequent assemblies.
5588  if ((!doing_residuals) && (!Problem_has_been_distributed) &&
5590  {
5592  }
5593 
5594  // We have determined load balancing for current setup.
5595  // This can remain the same until assign_eqn_numbers() is called
5596  // again -- the flag is re-set to true there.
5597  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5598  {
5600  }
5601 
5602 #endif
5603 
5604 
5605  //-----------Finally we need to convert this vector storage scheme
5606  //------------------------to the containers required by SuperLU
5607 
5608  // Loop over the number of matrices
5609  for (unsigned m = 0; m < n_matrix; m++)
5610  {
5611  // Set the number of rows or columns
5612  row_or_column_start[m] = new int[ndof + 1];
5613 
5614  // fill row_or_column_start and find the number of entries
5615  row_or_column_start[m][0] = 0;
5616  for (unsigned long i = 0; i < ndof; i++)
5617  {
5618  row_or_column_start[m][i + 1] =
5619  row_or_column_start[m][i] + matrix_data[m][i].size();
5620  }
5621  const unsigned entries = row_or_column_start[m][ndof];
5622 
5623  // resize vectors
5624  column_or_row_index[m] = new int[entries];
5625  value[m] = new double[entries];
5626  nnz[m] = entries;
5627 
5628  // Now we merely loop over the number of rows or columns
5629  for (unsigned long i_global = 0; i_global < ndof; i_global++)
5630  {
5631  // If there are no entries in the vector then skip the rest of the loop
5632  if (matrix_data[m][i_global].empty())
5633  {
5634  continue;
5635  }
5636 
5637  // Loop over all the entries in the vectors corresponding to the given
5638  // row or column. It will NOT be ordered
5639  unsigned p = 0;
5640  for (int j = row_or_column_start[m][i_global];
5641  j < row_or_column_start[m][i_global + 1];
5642  j++)
5643  {
5644  column_or_row_index[m][j] = matrix_data[m][i_global][p].first;
5645  value[m][j] = matrix_data[m][i_global][p].second;
5646  ++p;
5647  }
5648  }
5649  } // End of the loop over the matrices
5650 
5652  {
5653  oomph_info << "Pausing at end of sparse assembly." << std::endl;
5654  pause("Check memory usage now.");
5655  }
5656  }
5657 
5658 
5659  //=====================================================================
5660  /// This is a (private) helper function that is used to assemble system
5661  /// matrices in compressed row or column format
5662  /// and compute residual vectors using two vectors.
5663  /// The default action is to assemble the jacobian matrix and
5664  /// residuals for the Newton method. The action can be
5665  /// overloaded at an elemental level by chaging the default
5666  /// behaviour of the function Element::get_all_vectors_and_matrices().
5667  /// column_or_row_index: Column [or row] index of given entry
5668  /// row_or_column_start: Index of first entry for given row [or column]
5669  /// value : Vector of nonzero entries
5670  /// residuals : Residual vector
5671  /// compressed_row_flag: Bool flag to indicate if storage format is
5672  /// compressed row [if false interpretation of
5673  /// arguments is as stated in square brackets].
5674  //=====================================================================
5676  Vector<int*>& column_or_row_index,
5677  Vector<int*>& row_or_column_start,
5678  Vector<double*>& value,
5679  Vector<unsigned>& nnz,
5680  Vector<double*>& residuals,
5681  bool compressed_row_flag)
5682  {
5683  // Total number of elements
5684  const unsigned long n_elements = mesh_pt()->nelement();
5685 
5686  // Default range of elements for distributed problems
5687  unsigned long el_lo = 0;
5688  unsigned long el_hi = n_elements - 1;
5689 
5690 
5691 #ifdef OOMPH_HAS_MPI
5692  // Otherwise just loop over a fraction of the elements
5693  // (This will either have been initialised in
5694  // Problem::set_default_first_and_last_element_for_assembly() or
5695  // will have been re-assigned during a previous assembly loop
5696  // Note that following the re-assignment only the entries
5697  // for the current processor are relevant.
5699  {
5700  el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
5701  el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
5702  }
5703 #endif
5704 
5705  // number of local eqns
5706  unsigned ndof = this->ndof();
5707 
5708  // Find the number of vectors to be assembled
5709  const unsigned n_vector = residuals.size();
5710 
5711  // Find the number of matrices to be assembled
5712  const unsigned n_matrix = column_or_row_index.size();
5713 
5714  // Locally cache pointer to assembly handler
5716 
5717 #ifdef OOMPH_HAS_MPI
5718  bool doing_residuals = false;
5719  if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
5720  {
5721  doing_residuals = true;
5722  }
5723 #endif
5724 
5725 // Error check dimensions
5726 #ifdef PARANOID
5727  if (row_or_column_start.size() != n_matrix)
5728  {
5729  std::ostringstream error_stream;
5730  error_stream << "Error: " << std::endl
5731  << "row_or_column_start.size() "
5732  << row_or_column_start.size() << " does not equal "
5733  << "column_or_row_index.size() "
5734  << column_or_row_index.size() << std::endl;
5735  throw OomphLibError(
5736  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
5737  }
5738 
5739  if (value.size() != n_matrix)
5740  {
5741  std::ostringstream error_stream;
5742  error_stream << "Error: " << std::endl
5743  << "value.size() " << value.size() << " does not equal "
5744  << "column_or_row_index.size() "
5745  << column_or_row_index.size() << std::endl
5746  << std::endl
5747  << std::endl;
5748  throw OomphLibError(
5749  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
5750  }
5751 #endif
5752 
5753  // The idea behind this sparse assembly routine is to use Vectors of
5754  // Vectors for the entries in each complete matrix. And a second
5755  // Vector of Vectors stores the global row (or column) indeces. This
5756  // will not have the memory overheads associated with the methods using
5757  // lists or maps, but insertion will be more costly.
5758 
5759  // Set up two vector of vectors to store the entries of each matrix,
5760  // indexed by either the column or row. The entries of the vector for
5761  // each matrix correspond to all the rows or columns of that matrix.
5762  Vector<Vector<Vector<unsigned>>> matrix_row_or_col_indices(n_matrix);
5763  Vector<Vector<Vector<double>>> matrix_values(n_matrix);
5764 
5765  // Loop over the number of matrices being assembled and resize
5766  // each vector of vectors to the number of rows or columns of the matrix
5767  for (unsigned m = 0; m < n_matrix; m++)
5768  {
5769  matrix_row_or_col_indices[m].resize(ndof);
5770  matrix_values[m].resize(ndof);
5771  }
5772 
5773  // Resize the residuals vectors
5774  for (unsigned v = 0; v < n_vector; v++)
5775  {
5776  residuals[v] = new double[ndof];
5777  for (unsigned i = 0; i < ndof; i++)
5778  {
5779  residuals[v][i] = 0;
5780  }
5781  }
5782 
5783 #ifdef OOMPH_HAS_MPI
5784 
5785  // Storage for assembly time for elements
5786  double t_assemble_start = 0.0;
5787 
5788  // Storage for assembly times
5789  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5790  {
5791  Elemental_assembly_time.resize(n_elements);
5792  }
5793 
5794 #endif
5795 
5796 
5797  //----------------Assemble and populate the vector storage scheme-------
5798  {
5799  // Allocate local storage for the element's contribution to the
5800  // residuals vectors and system matrices of the size of the maximum
5801  // number of dofs in any element
5802  // This means that the storage will only be allocated (and deleted) once
5803  Vector<Vector<double>> el_residuals(n_vector);
5804  Vector<DenseMatrix<double>> el_jacobian(n_matrix);
5805 
5806  // Loop over the elements
5807  for (unsigned long e = el_lo; e <= el_hi; e++)
5808  {
5809 #ifdef OOMPH_HAS_MPI
5810  // Time it?
5811  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5812  {
5813  t_assemble_start = TimingHelpers::timer();
5814  }
5815 #endif
5816 
5817  // Get the pointer to the element
5818  GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
5819 
5820 #ifdef OOMPH_HAS_MPI
5821  // Ignore halo elements
5822  if (!elem_pt->is_halo())
5823  {
5824 #endif
5825 
5826  // Find number of degrees of freedom in the element
5827  const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
5828 
5829  // Resize the storage for elemental jacobian and residuals
5830  for (unsigned v = 0; v < n_vector; v++)
5831  {
5832  el_residuals[v].resize(nvar);
5833  }
5834  for (unsigned m = 0; m < n_matrix; m++)
5835  {
5836  el_jacobian[m].resize(nvar);
5837  }
5838 
5839  // Now get the residuals and jacobian for the element
5841  elem_pt, el_residuals, el_jacobian);
5842 
5843  //---------------Insert the values into the vectors--------------
5844 
5845  // Loop over the first index of local variables
5846  for (unsigned i = 0; i < nvar; i++)
5847  {
5848  // Get the local equation number
5849  unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
5850 
5851  // Add the contribution to the residuals
5852  for (unsigned v = 0; v < n_vector; v++)
5853  {
5854  // Fill in each residuals vector
5855  residuals[v][eqn_number] += el_residuals[v][i];
5856  }
5857 
5858  // Now loop over the other index
5859  for (unsigned j = 0; j < nvar; j++)
5860  {
5861  // Get the number of the unknown
5862  unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
5863 
5864  // Loop over the matrices
5865  // If it's compressed row storage, then our vector of maps
5866  // is indexed by row (equation number)
5867  for (unsigned m = 0; m < n_matrix; m++)
5868  {
5869  // Get the value of the matrix at this point
5870  double value = el_jacobian[m](i, j);
5871  // Only bother to add to the vector if it's non-zero
5872  if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
5873  {
5874  // If it's compressed row storage, then our vector of maps
5875  // is indexed by row (equation number)
5876  if (compressed_row_flag)
5877  {
5878  // Find the correct position and add the data into the
5879  // vectors
5880  const unsigned size =
5881  matrix_row_or_col_indices[m][eqn_number].size();
5882 
5883  for (unsigned k = 0; k <= size; k++)
5884  {
5885  if (k == size)
5886  {
5887  matrix_row_or_col_indices[m][eqn_number].push_back(
5888  unknown);
5889  matrix_values[m][eqn_number].push_back(value);
5890  break;
5891  }
5892  else if (matrix_row_or_col_indices[m][eqn_number][k] ==
5893  unknown)
5894  {
5895  matrix_values[m][eqn_number][k] += value;
5896  break;
5897  }
5898  }
5899  }
5900  // Otherwise it's compressed column storage and our vector is
5901  // indexed by column (the unknown)
5902  else
5903  {
5904  // Add the data into the vectors in the correct position
5905  const unsigned size =
5906  matrix_row_or_col_indices[m][unknown].size();
5907  for (unsigned k = 0; k <= size; k++)
5908  {
5909  if (k == size)
5910  {
5911  matrix_row_or_col_indices[m][unknown].push_back(
5912  eqn_number);
5913  matrix_values[m][unknown].push_back(value);
5914  break;
5915  }
5916  else if (matrix_row_or_col_indices[m][unknown][k] ==
5917  eqn_number)
5918  {
5919  matrix_values[m][unknown][k] += value;
5920  break;
5921  }
5922  }
5923  }
5924  }
5925  } // End of loop over matrices
5926  }
5927  }
5928 
5929 #ifdef OOMPH_HAS_MPI
5930  } // endif halo element
5931 #endif
5932 
5933 
5934 #ifdef OOMPH_HAS_MPI
5935  // Time it?
5936  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5937  {
5939  TimingHelpers::timer() - t_assemble_start;
5940  }
5941 #endif
5942 
5943  } // End of loop over the elements
5944 
5945  } // End of vector assembly
5946 
5947 
5948 #ifdef OOMPH_HAS_MPI
5949 
5950  // Postprocess timing information and re-allocate distribution of
5951  // elements during subsequent assemblies.
5952  if ((!doing_residuals) && (!Problem_has_been_distributed) &&
5954  {
5956  }
5957 
5958  // We have determined load balancing for current setup.
5959  // This can remain the same until assign_eqn_numbers() is called
5960  // again -- the flag is re-set to true there.
5961  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
5962  {
5964  }
5965 
5966 #endif
5967 
5968  //-----------Finally we need to convert this lousy vector storage scheme
5969  //------------------------to the containers required by SuperLU
5970 
5971  // Loop over the number of matrices
5972  for (unsigned m = 0; m < n_matrix; m++)
5973  {
5974  // Set the number of rows or columns
5975  row_or_column_start[m] = new int[ndof + 1];
5976 
5977  // fill row_or_column_start and find the number of entries
5978  row_or_column_start[m][0] = 0;
5979  for (unsigned long i = 0; i < ndof; i++)
5980  {
5981  row_or_column_start[m][i + 1] =
5982  row_or_column_start[m][i] + matrix_values[m][i].size();
5983  }
5984  const unsigned entries = row_or_column_start[m][ndof];
5985 
5986  // resize vectors
5987  column_or_row_index[m] = new int[entries];
5988  value[m] = new double[entries];
5989  nnz[m] = entries;
5990 
5991  // Now we merely loop over the number of rows or columns
5992  for (unsigned long i_global = 0; i_global < ndof; i_global++)
5993  {
5994  // If there are no entries in the vector then skip the rest of the loop
5995  if (matrix_values[m][i_global].empty())
5996  {
5997  continue;
5998  }
5999 
6000  // Loop over all the entries in the vectors corresponding to the given
6001  // row or column. It will NOT be ordered
6002  unsigned p = 0;
6003  for (int j = row_or_column_start[m][i_global];
6004  j < row_or_column_start[m][i_global + 1];
6005  j++)
6006  {
6007  column_or_row_index[m][j] = matrix_row_or_col_indices[m][i_global][p];
6008  value[m][j] = matrix_values[m][i_global][p];
6009  ++p;
6010  }
6011  }
6012  } // End of the loop over the matrices
6013 
6015  {
6016  oomph_info << "Pausing at end of sparse assembly." << std::endl;
6017  pause("Check memory usage now.");
6018  }
6019  }
6020 
6021 
6022  //=====================================================================
6023  /// This is a (private) helper function that is used to assemble system
6024  /// matrices in compressed row or column format
6025  /// and compute residual vectors using two vectors.
6026  /// The default action is to assemble the jacobian matrix and
6027  /// residuals for the Newton method. The action can be
6028  /// overloaded at an elemental level by chaging the default
6029  /// behaviour of the function Element::get_all_vectors_and_matrices().
6030  /// column_or_row_index: Column [or row] index of given entry
6031  /// row_or_column_start: Index of first entry for given row [or column]
6032  /// value : Vector of nonzero entries
6033  /// residuals : Residual vector
6034  /// compressed_row_flag: Bool flag to indicate if storage format is
6035  /// compressed row [if false interpretation of
6036  /// arguments is as stated in square brackets].
6037  //=====================================================================
6039  Vector<int*>& column_or_row_index,
6040  Vector<int*>& row_or_column_start,
6041  Vector<double*>& value,
6042  Vector<unsigned>& nnz,
6043  Vector<double*>& residuals,
6044  bool compressed_row_flag)
6045  {
6046  // Total number of elements
6047  const unsigned long n_elements = mesh_pt()->nelement();
6048 
6049  // Default range of elements for distributed problems
6050  unsigned long el_lo = 0;
6051  unsigned long el_hi = n_elements - 1;
6052 
6053 
6054 #ifdef OOMPH_HAS_MPI
6055  // Otherwise just loop over a fraction of the elements
6056  // (This will either have been initialised in
6057  // Problem::set_default_first_and_last_element_for_assembly() or
6058  // will have been re-assigned during a previous assembly loop
6059  // Note that following the re-assignment only the entries
6060  // for the current processor are relevant.
6062  {
6063  el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
6064  el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
6065  }
6066 #endif
6067 
6068  // number of local eqns
6069  unsigned ndof = this->ndof();
6070 
6071  // Find the number of vectors to be assembled
6072  const unsigned n_vector = residuals.size();
6073 
6074  // Find the number of matrices to be assembled
6075  const unsigned n_matrix = column_or_row_index.size();
6076 
6077  // Locally cache pointer to assembly handler
6079 
6080 #ifdef OOMPH_HAS_MPI
6081  bool doing_residuals = false;
6082  if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
6083  {
6084  doing_residuals = true;
6085  }
6086 #endif
6087 
6088 // Error check dimensions
6089 #ifdef PARANOID
6090  if (row_or_column_start.size() != n_matrix)
6091  {
6092  std::ostringstream error_stream;
6093  error_stream << "Error: " << std::endl
6094  << "row_or_column_start.size() "
6095  << row_or_column_start.size() << " does not equal "
6096  << "column_or_row_index.size() "
6097  << column_or_row_index.size() << std::endl;
6098  throw OomphLibError(
6099  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
6100  }
6101 
6102  if (value.size() != n_matrix)
6103  {
6104  std::ostringstream error_stream;
6105  error_stream << "Error: " << std::endl
6106  << "value.size() " << value.size() << " does not equal "
6107  << "column_or_row_index.size() "
6108  << column_or_row_index.size() << std::endl
6109  << std::endl
6110  << std::endl;
6111  throw OomphLibError(
6112  error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
6113  }
6114 #endif
6115 
6116  // The idea behind this sparse assembly routine is to use Vectors of
6117  // Vectors for the entries in each complete matrix. And a second
6118  // Vector of Vectors stores the global row (or column) indeces. This
6119  // will not have the memory overheads associated with the methods using
6120  // lists or maps, but insertion will be more costly.
6121 
6122  // Set up two vector of vectors to store the entries of each matrix,
6123  // indexed by either the column or row. The entries of the vector for
6124  // each matrix correspond to all the rows or columns of that matrix.
6125  Vector<unsigned**> matrix_row_or_col_indices(n_matrix);
6126  Vector<double**> matrix_values(n_matrix);
6127 
6128  // Loop over the number of matrices being assembled and resize
6129  // each vector of vectors to the number of rows or columns of the matrix
6130  for (unsigned m = 0; m < n_matrix; m++)
6131  {
6132  matrix_row_or_col_indices[m] = new unsigned*[ndof];
6133  matrix_values[m] = new double*[ndof];
6134  }
6135 
6136  // Resize the residuals vectors
6137  for (unsigned v = 0; v < n_vector; v++)
6138  {
6139  residuals[v] = new double[ndof];
6140  for (unsigned i = 0; i < ndof; i++)
6141  {
6142  residuals[v][i] = 0;
6143  }
6144  }
6145 
6146 #ifdef OOMPH_HAS_MPI
6147 
6148  // Storage for assembly time for elements
6149  double t_assemble_start = 0.0;
6150 
6151  // Storage for assembly times
6152  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
6153  {
6154  Elemental_assembly_time.resize(n_elements);
6155  }
6156 
6157 #endif
6158 
6159  // number of coefficients in each row
6160  Vector<Vector<unsigned>> ncoef(n_matrix);
6161  for (unsigned m = 0; m < n_matrix; m++)
6162  {
6163  ncoef[m].resize(ndof, 0);
6164  }
6165 
6167  {
6169  for (unsigned m = 0; m < n_matrix; m++)
6170  {
6172  }
6173  }
6174 
6175  //----------------Assemble and populate the vector storage scheme-------
6176  {
6177  // Allocate local storage for the element's contribution to the
6178  // residuals vectors and system matrices of the size of the maximum
6179  // number of dofs in any element
6180  // This means that the storage will only be allocated (and deleted) once
6181  Vector<Vector<double>> el_residuals(n_vector);
6182  Vector<DenseMatrix<double>> el_jacobian(n_matrix);
6183 
6184  // Loop over the elements
6185  for (unsigned long e = el_lo; e <= el_hi; e++)
6186  {
6187 #ifdef OOMPH_HAS_MPI
6188  // Time it?
6189  if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
6190  {
6191  t_assemble_start = TimingHelpers::timer();
6192  }
6193 #endif
6194 
6195  // Get the pointer to the element
6196  GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
6197 
6198 #ifdef OOMPH_HAS_MPI
6199  // Ignore halo elements
6200  if (!elem_pt->is_halo())
6201  {
6202 #endif
6203 
6204  // Find number of degrees of freedom in the element
6205  const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
6206 
6207  // Resize the storage for elemental jacobian and residuals
6208  for (unsigned v = 0; v < n_vector; v++)
6209  {
6210  el_residuals[v].resize(nvar);
6211  }
6212  for (unsigned m = 0; m < n_matrix; m++)
6213  {
6214  el_jacobian[m].resize(nvar);
6215  }
6216 
6217  // Now get the residuals and jacobian for the element
6219  elem_pt, el_residuals, el_jacobian);
6220 
6221  //---------------Insert the values into the vectors--------------
6222 
6223  // Loop over the first index of local variables
6224  for (unsigned i = 0; i < nvar; i++)
6225  {
6226  // Get the local equation number
6227  unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
6228 
6229  // Add the contribution to the residuals
6230  for (unsigned v = 0; v < n_vector; v++)
6231  {
6232  // Fill in each residuals vector
6233  residuals[v][eqn_number] += el_residuals[v][i];
6234  }
6235 
6236  // Now loop over the other index
6237  for (unsigned j = 0; j < nvar; j++)
6238  {
6239  // Get the number of the unknown
6240  unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
6241 
6242  // Loop over the matrices
6243  // If it's compressed row storage, then our vector of maps
6244  // is indexed by row (equation number)
6245  for (unsigned m = 0; m < n_matrix; m++)
6246  {
6247  // Get the value of the matrix at this point
6248  double value = el_jacobian[m](i, j);
6249  // Only bother to add to the vector if it's non-zero
6250  if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
6251  {
6252  // number of entrys in this row
6253  const unsigned size = ncoef[m][eqn_number];
6254 
6255  // if no data has been allocated for this row then allocate
6256  if (size == 0)
6257  {
6258  // do we have previous allocation data
6260  [m][eqn_number] != 0)
6261  {
6262  matrix_row_or_col_indices[m][eqn_number] = new unsigned
6264  [m][eqn_number]];
6265  matrix_values[m][eqn_number] = new double
6267  [m][eqn_number]];
6268  }
6269  else
6270  {
6271  matrix_row_or_col_indices[m][eqn_number] = new unsigned
6273  matrix_values[m][eqn_number] = new double
6276  [m][eqn_number] =
6278  }
6279  }
6280 
6281  // If it's compressed row storage, then our vector of maps
6282  // is indexed by row (equation number)
6283  if (compressed_row_flag)
6284  {
6285  // next add the data
6286  for (unsigned k = 0; k <= size; k++)
6287  {
6288  if (k == size)
6289  {
6290  // do we need to allocate more storage
6292  [m][eqn_number] == ncoef[m][eqn_number])
6293  {
6294  unsigned new_allocation =
6295  ncoef[m][eqn_number] +
6297  double* new_values = new double[new_allocation];
6298  unsigned* new_indices = new unsigned[new_allocation];
6299  for (unsigned c = 0; c < ncoef[m][eqn_number]; c++)
6300  {
6301  new_values[c] = matrix_values[m][eqn_number][c];
6302  new_indices[c] =
6303  matrix_row_or_col_indices[m][eqn_number][c];
6304  }
6305  delete[] matrix_values[m][eqn_number];
6306  delete[] matrix_row_or_col_indices[m][eqn_number];
6307  matrix_values[m][eqn_number] = new_values;
6308  matrix_row_or_col_indices[m][eqn_number] =
6309  new_indices;
6311  [m][eqn_number] = new_allocation;
6312  }
6313  // and now add the data
6314  unsigned entry = ncoef[m][eqn_number];
6315  ncoef[m][eqn_number]++;
6316