linear_solver.cc
Go to the documentation of this file.
1// LIC// ====================================================================
2// LIC// This file forms part of oomph-lib, the object-oriented,
3// LIC// multi-physics finite-element library, available
4// LIC// at http://www.oomph-lib.org.
5// LIC//
6// LIC// Copyright (C) 2006-2022 Matthias Heil and Andrew Hazel
7// LIC//
8// LIC// This library is free software; you can redistribute it and/or
9// LIC// modify it under the terms of the GNU Lesser General Public
10// LIC// License as published by the Free Software Foundation; either
11// LIC// version 2.1 of the License, or (at your option) any later version.
12// LIC//
13// LIC// This library is distributed in the hope that it will be useful,
14// LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// LIC// Lesser General Public License for more details.
17// LIC//
18// LIC// You should have received a copy of the GNU Lesser General Public
19// LIC// License along with this library; if not, write to the Free Software
20// LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21// LIC// 02110-1301 USA.
22// LIC//
23// LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
24// LIC//
25// LIC//====================================================================
26// The actual solve functions for dense LU linear solvers.
27
28// Config header generated by autoconfig
29#ifdef HAVE_CONFIG_H
30#include <oomph-lib-config.h>
31#endif
32
33#ifdef OOMPH_HAS_MPI
34#include "mpi.h"
35#endif
36
37// oomph-lib includes
38#include "Vector.h"
39#include "linear_solver.h"
40#include "matrices.h"
41#include "problem.h"
42
43
44namespace oomph
45{
46 //=============================================================================
47 /// Solver: Takes pointer to problem and returns the results Vector
48 /// which contains the solution of the linear system defined by
49 /// the problem's fully assembled Jacobian and residual Vector.
50 //=============================================================================
51 void DenseLU::solve(Problem* const& problem_pt, DoubleVector& result)
52 {
53 // Initialise timer
54 double t_start = TimingHelpers::timer();
55
56 // Find # of degrees of freedom (variables)
57 const unsigned n_dof = problem_pt->ndof();
58
59 // Allocate storage for the residuals vector and the jacobian matrix
60 DoubleVector residuals;
61 DenseDoubleMatrix jacobian(n_dof);
62
63 // initialise timer
64 double t_start_jacobian = TimingHelpers::timer();
65
66 // Get the full jacobian and residuals of the problem
67 problem_pt->get_jacobian(residuals, jacobian);
68
69 // compute jacobian setup time
70 double t_end_jacobian = TimingHelpers::timer();
71 Jacobian_setup_time = t_end_jacobian - t_start_jacobian;
72
73 // Report the time
74 if (Doc_time)
75 {
76 oomph_info << std::endl
77 << "CPU for setup of Dense Jacobian: "
80 << std::endl;
81 }
82
83 // Solve by dense LU decomposition VERY INEFFICIENT!
84 solve(&jacobian, residuals, result);
85
86 // Set the sign of the determinant of the jacobian
88
89 // Finalise/doc timings
90 double t_end = TimingHelpers::timer();
91 double total_time = t_end - t_start;
92 if (Doc_time)
93 {
94 oomph_info << "CPU for DenseLU LinearSolver: "
96 << std::endl
97 << std::endl;
98 }
99 }
100
101
102 //=============================================================================
103 /// Delete the storage that has been allocated for the LU factors, if
104 /// the matrix data is not itself being overwritten.
105 //=============================================================================
107 {
108 // delete the Distribution_pt
109 this->clear_distribution();
110
111 // Clean up the LU factor storage, if it has been allocated
112 // N.B. we don't need to check the index storage as well.
113 if (LU_factors != 0)
114 {
115 // Delete the pointer to the LU factors
116 delete[] LU_factors;
117 // Null out the vector
118 LU_factors = 0;
119 // Delete the pointer to the Index
120 delete[] Index;
121 // Null out
122 Index = 0;
123 }
124 }
125
126 //=============================================================================
127 /// LU decompose the matrix.
128 /// WARNING: this class does not perform any PARANOID checks on the vectors -
129 /// these are all performed in the solve(...) method.
130 //=============================================================================
131 void DenseLU::factorise(DoubleMatrixBase* const& matrix_pt)
132 {
133 // Set the number of unknowns
134 const unsigned long n = matrix_pt->nrow();
135
136 // Small constant
137 const double small_number = 1.0e-20;
138
139 // Vector scaling stores the implicit scaling of each row
140 Vector<double> scaling(n);
141
142 // Integer to store the sign that must multiply the determinant as
143 // a consequence of the row/column interchanges
144 int signature = 1;
145
146 // Loop over rows to get implicit scaling information
147 for (unsigned long i = 0; i < n; i++)
148 {
149 double largest_entry = 0.0;
150 for (unsigned long j = 0; j < n; j++)
151 {
152 double tmp = std::fabs((*matrix_pt)(i, j));
153 if (tmp > largest_entry) largest_entry = tmp;
154 }
155 if (largest_entry == 0.0)
156 {
157 throw OomphLibError(
158 "Singular Matrix", OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
159 }
160 // Save the scaling
161 scaling[i] = 1.0 / largest_entry;
162 }
163
164 // Firsly, we shall delete any previous LU storage.
165 // If the user calls this function twice without changing the matrix
166 // then it is their own inefficiency, not ours (this time).
168
169 // Allocate storage for the LU factors, the index and store
170 // the number of unknowns
171 LU_factors = new double[n * n];
172 Index = new long[n];
173
174 // Now we know that memory has been allocated, copy over
175 // the matrix values
176 unsigned count = 0;
177 for (unsigned long i = 0; i < n; i++)
178 {
179 for (unsigned long j = 0; j < n; j++)
180 {
181 LU_factors[count] = (*matrix_pt)(i, j);
182 ++count;
183 }
184 }
185
186 // Loop over columns
187 for (unsigned long j = 0; j < n; j++)
188 {
189 // Initialise imax
190 unsigned long imax = 0;
191
192 for (unsigned long i = 0; i < j; i++)
193 {
194 double sum = LU_factors[n * i + j];
195 for (unsigned long k = 0; k < i; k++)
196 {
197 sum -= LU_factors[n * i + k] * LU_factors[n * k + j];
198 }
199 LU_factors[n * i + j] = sum;
200 }
201
202 // Initialise search for largest pivot element
203 double largest_entry = 0.0;
204 for (unsigned long i = j; i < n; i++)
205 {
206 double sum = LU_factors[n * i + j];
207 for (unsigned long k = 0; k < j; k++)
208 {
209 sum -= LU_factors[n * i + k] * LU_factors[n * k + j];
210 }
211 LU_factors[n * i + j] = sum;
212 // Set temporary
213 double tmp = scaling[i] * std::fabs(sum);
214 if (tmp >= largest_entry)
215 {
216 largest_entry = tmp;
217 imax = i;
218 }
219 }
220
221 // Test to see if we need to interchange rows
222 if (j != imax)
223 {
224 for (unsigned long k = 0; k < n; k++)
225 {
226 double tmp = LU_factors[n * imax + k];
227 LU_factors[n * imax + k] = LU_factors[n * j + k];
228 LU_factors[n * j + k] = tmp;
229 }
230 // Change the parity of signature
231 signature = -signature;
232
233 // Interchange scale factor
234 scaling[imax] = scaling[j];
235 }
236
237 // Set the index
238 Index[j] = imax;
239 if (LU_factors[n * j + j] == 0.0)
240 {
241 LU_factors[n * j + j] = small_number;
242 }
243 // Divide by pivot element
244 if (j != n - 1)
245 {
246 double tmp = 1.0 / LU_factors[n * j + j];
247 for (unsigned long i = j + 1; i < n; i++)
248 {
249 LU_factors[n * i + j] *= tmp;
250 }
251 }
252
253 } // End of loop over columns
254
255
256 // Now multiply all the diagonal terms together to get the determinant
257 // Note that we need to use the mantissa, exponent formulation to
258 // avoid underflow errors
259 double determinant_mantissa = 1.0;
260 int determinant_exponent = 0, iexp;
261 for (unsigned i = 0; i < n; i++)
262 {
263 // Multiply by the next diagonal entry's mantissa
264 // and return the exponent
265 determinant_mantissa *= frexp(LU_factors[n * i + i], &iexp);
266
267 // Add the new exponent to the current exponent
268 determinant_exponent += iexp;
269
270 // normalise
271 determinant_mantissa = frexp(determinant_mantissa, &iexp);
272 determinant_exponent += iexp;
273 }
274
275 // If paranoid issue a warning that the matrix is near singular
276 // #ifdef PARANOID
277 // int tiny_exponent = -60;
278 // if(determinant_exponent < tiny_exponent)
279 // {
280 // std::ostringstream warning_stream;
281 // warning_stream << "The determinant of the matrix is very close to
282 // zero.\n"
283 // << "It is " << determinant_mantissa << " x 2^"
284 // << determinant_exponent << "\n";
285 // warning_stream << "The results will depend on the exact details of
286 // the\n"
287 // << "floating point implementation ... just to let you
288 // know\n";
289 // OomphLibWarning(warning_stream.str(),
290 // "DenseLU::factorise()",
291 // OOMPH_EXCEPTION_LOCATION);
292 // }
293 // #endif
294
295 // Integer to store the sign of the determinant
296 int sign = 0;
297
298 // Find the sign of the determinant
299 if (determinant_mantissa > 0.0)
300 {
301 sign = 1;
302 }
303 if (determinant_mantissa < 0.0)
304 {
305 sign = -1;
306 }
307
308 // Multiply the sign by the signature
309 sign *= signature;
310
311 // Return the sign of the determinant
313 }
314
315 //=============================================================================
316 /// Do the backsubstitution for the DenseLU solver.
317 /// WARNING: this class does not perform any PARANOID checks on the vectors -
318 /// these are all performed in the solve(...) method.
319 //=============================================================================
321 {
322 // Get pointers to first entries
323 const double* rhs_pt = rhs.values_pt();
324 double* result_pt = result.values_pt();
325
326 // Copy the rhs vector into the result vector
327 const unsigned long n = rhs.nrow();
328 for (unsigned long i = 0; i < n; ++i)
329 {
330 result_pt[i] = rhs_pt[i];
331 }
332
333 // Loop over all rows for forward substition
334 unsigned long k = 0;
335 for (unsigned long i = 0; i < n; i++)
336 {
337 unsigned long ip = Index[i];
338 double sum = result_pt[ip];
339 result_pt[ip] = result_pt[i];
340 if (k != 0)
341 {
342 for (unsigned long j = k - 1; j < i; j++)
343 {
344 sum -= LU_factors[n * i + j] * result_pt[j];
345 }
346 }
347 else if (sum != 0.0)
348 {
349 k = i + 1;
350 }
351 result_pt[i] = sum;
352 }
353
354 // Now do the back substitution
355 for (long i = long(n) - 1; i >= 0; i--)
356 {
357 double sum = result_pt[i];
358 for (long j = i + 1; j < long(n); j++)
359 {
360 sum -= LU_factors[n * i + j] * result_pt[j];
361 }
362 result_pt[i] = sum / LU_factors[n * i + i];
363 }
364 }
365
366 //=============================================================================
367 /// Do the backsubstitution for the DenseLU solver.
368 /// WARNING: this class does not perform any PARANOID checks on the vectors -
369 /// these are all performed in the solve(...) method. So, if you call backsub
370 /// directly, you have been warned...
371 //=============================================================================
373 {
374 // Copy the rhs vector into the result vector
375 const unsigned long n = rhs.size();
376 for (unsigned long i = 0; i < n; ++i)
377 {
378 result[i] = rhs[i];
379 }
380
381 // Loop over all rows for forward substition
382 unsigned long k = 0;
383 for (unsigned long i = 0; i < n; i++)
384 {
385 unsigned long ip = Index[i];
386 double sum = result[ip];
387 result[ip] = result[i];
388 if (k != 0)
389 {
390 for (unsigned long j = k - 1; j < i; j++)
391 {
392 sum -= LU_factors[n * i + j] * result[j];
393 }
394 }
395 else if (sum != 0.0)
396 {
397 k = i + 1;
398 }
399 result[i] = sum;
400 }
401
402 // Now do the back substitution
403 for (long i = long(n) - 1; i >= 0; i--)
404 {
405 double sum = result[i];
406 for (long j = i + 1; j < long(n); j++)
407 {
408 sum -= LU_factors[n * i + j] * result[j];
409 }
410 result[i] = sum / LU_factors[n * i + i];
411 }
412 }
413
414
415 //=============================================================================
416 /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
417 /// vector and returns the solution of the linear system.
418 //============================================================================
419 void DenseLU::solve(DoubleMatrixBase* const& matrix_pt,
420 const DoubleVector& rhs,
421 DoubleVector& result)
422 {
423#ifdef PARANOID
424 // check that the rhs vector is not distributed
425 if (rhs.distribution_pt()->distributed())
426 {
427 std::ostringstream error_message_stream;
428 error_message_stream
429 << "The vectors rhs and result must not be distributed";
430 throw OomphLibError(error_message_stream.str(),
431 OOMPH_CURRENT_FUNCTION,
432 OOMPH_EXCEPTION_LOCATION);
433 }
434
435 // check that the matrix is square
436 if (matrix_pt->nrow() != matrix_pt->ncol())
437 {
438 std::ostringstream error_message_stream;
439 error_message_stream << "The matrix at matrix_pt must be square.";
440 throw OomphLibError(error_message_stream.str(),
441 OOMPH_CURRENT_FUNCTION,
442 OOMPH_EXCEPTION_LOCATION);
443 }
444 // check that the matrix and the rhs vector have the same nrow()
445 if (matrix_pt->nrow() != rhs.nrow())
446 {
447 std::ostringstream error_message_stream;
448 error_message_stream
449 << "The matrix and the rhs vector must have the same number of rows.";
450 throw OomphLibError(error_message_stream.str(),
451 OOMPH_CURRENT_FUNCTION,
452 OOMPH_EXCEPTION_LOCATION);
453 }
454
455 // if the matrix is distributable then it too should have the same
456 // communicator as the rhs vector and should not be distributed
457 DistributableLinearAlgebraObject* dist_matrix_pt =
458 dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
459 if (dist_matrix_pt != 0)
460 {
461 if (dist_matrix_pt->distribution_pt()->communicator_pt()->nproc() > 1 &&
462 dist_matrix_pt->distribution_pt()->distributed() == true)
463 {
464 throw OomphLibError(
465 "Matrix must not be distributed or only one processor",
466 OOMPH_CURRENT_FUNCTION,
467 OOMPH_EXCEPTION_LOCATION);
468 }
470 if (!(temp_comm == *dist_matrix_pt->distribution_pt()->communicator_pt()))
471 {
472 std::ostringstream error_message_stream;
473 error_message_stream
474 << "The matrix matrix_pt must have the same communicator as the "
475 "vectors"
476 << " rhs and result must have the same communicator";
477 throw OomphLibError(error_message_stream.str(),
478 OOMPH_CURRENT_FUNCTION,
479 OOMPH_EXCEPTION_LOCATION);
480 }
481 }
482 // if the result vector is setup then check it is not distributed and has
483 // the same communicator as the rhs vector
484 if (result.distribution_built())
485 {
486 if (!(*result.distribution_pt() == *rhs.distribution_pt()))
487 {
488 std::ostringstream error_message_stream;
489 error_message_stream
490 << "The result vector distribution has been setup; it must have the "
491 << "same distribution as the rhs vector.";
492 throw OomphLibError(error_message_stream.str(),
493 OOMPH_CURRENT_FUNCTION,
494 OOMPH_EXCEPTION_LOCATION);
495 }
496 }
497#endif
498
499 if (!result.distribution_built())
500 {
501 result.build(rhs.distribution_pt(), 0.0);
502 }
503
504 // set the distribution
506
507 // Time the solver
508 double t_start = TimingHelpers::timer();
509
510 // factorise
511 factorise(matrix_pt);
512
513 // backsubstitute
514 backsub(rhs, result);
515
516 // Doc time for solver
517 double t_end = TimingHelpers::timer();
518
519 Solution_time = t_end - t_start;
520 if (Doc_time)
521 {
522 oomph_info << std::endl
523 << "CPU for solve with DenseLU : "
526 << std::endl
527 << std::endl;
528 }
529
530 // If we are not resolving then delete storage
531 if (!Enable_resolve)
532 {
534 }
535 }
536
537 //=============================================================================
538 /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
539 /// vector and returns the solution of the linear system.
540 //=============================================================================
541 void DenseLU::solve(DoubleMatrixBase* const& matrix_pt,
542 const Vector<double>& rhs,
543 Vector<double>& result)
544 {
545 // Time the solver
546 clock_t t_start = clock();
547
548 factorise(matrix_pt);
549 backsub(rhs, result);
550
551 // Doc time for solver
552 clock_t t_end = clock();
553
554 Solution_time = double(t_end - t_start) / CLOCKS_PER_SEC;
555 if (Doc_time)
556 {
557 oomph_info << "CPU for solve with DenseLU : "
560 << std::endl;
561 }
562
563 // If we are not resolving then delete storage
564 if (!Enable_resolve)
565 {
567 }
568 }
569
570 //==================================================================
571 /// Solver: Takes pointer to problem and returns the results Vector
572 /// which contains the solution of the linear system defined by
573 /// the problem's residual Vector. (Jacobian assembled by FD).
574 //==================================================================
575 void FD_LU::solve(Problem* const& problem_pt, DoubleVector& result)
576 {
577 // Initialise timer
578 clock_t t_start = clock();
579
580#ifdef PARANOID
581 // if the result vector is setup then check it is not distributed and has
582 // the same communicator as the rhs vector
583 if (result.built())
584 {
585 if (result.distributed())
586 {
587 std::ostringstream error_message_stream;
588 error_message_stream << "The result vector must not be distributed";
589 throw OomphLibError(error_message_stream.str(),
590 OOMPH_CURRENT_FUNCTION,
591 OOMPH_EXCEPTION_LOCATION);
592 }
593 }
594#endif
595
596 // Find # of degrees of freedom
597 unsigned long n_dof = problem_pt->ndof();
598
599 // Allocate storage for the residuals vector and the jacobian matrix
600 DoubleVector residuals;
601 DenseDoubleMatrix jacobian(n_dof);
602
603 {
604 // initialise timer
605 clock_t t_start = clock();
606
607 // Get the full jacobian by finite differencing) VERY INEFFICIENT!
608 problem_pt->get_fd_jacobian(residuals, jacobian);
609
610 // compute jacobian setup time
611 clock_t t_end = clock();
612 Jacobian_setup_time = double(t_end - t_start) / CLOCKS_PER_SEC;
613
614 // Report the time
615 if (Doc_time)
616 {
617 oomph_info << std::endl
618 << "CPU for setup of Dense Jacobian: "
621 << std::endl
622 << std::endl;
623 }
624 }
625
626 // Solve by dense LU decomposition (not efficient)
627 solve(&jacobian, residuals, result);
628
629 // Set the sign of the determinant of the jacobian
631
632 // Finalise/doc timings
633 clock_t t_end = clock();
634 double total_time = double(t_end - t_start) / CLOCKS_PER_SEC;
635 if (Doc_time)
636 {
637 oomph_info << "CPU for FD DenseLU LinearSolver: "
639 << std::endl
640 << std::endl;
641 }
642 }
643
644
645 //===================================================================
646 // Interface to SuperLU wrapper
647 //===================================================================
648 extern "C"
649 {
650 int superlu(int*,
651 int*,
652 int*,
653 int*,
654 double*,
655 int*,
656 int*,
657 double*,
658 int*,
659 int*,
660 int*,
661 void*,
662 int*);
663 }
664
665
666#ifdef OOMPH_HAS_MPI
667 //===================================================================
668 // Interface to SuperLU_DIST wrapper
669 //===================================================================
670 extern "C"
671 {
672 // Interface to distributed SuperLU solver where each processor
673 // holds the entire matrix
674 void superlu_dist_global_matrix(int opt_flag,
675 int allow_permutations,
676 int n,
677 int nnz,
678 double* values,
679 int* row_index,
680 int* col_start,
681 double* b,
682 int nprow,
683 int npcol,
684 int doc,
685 void** data,
686 int* info,
687 MPI_Comm comm);
688
689 // Interface to distributed SuperLU solver where each processor
690 // holds part of the matrix
692 int allow_permutations,
693 int n,
694 int nnz_local,
695 int nrow_local,
696 int first_row,
697 double* values,
698 int* col_index,
699 int* row_start,
700 double* b,
701 int nprow,
702 int npcol,
703 int doc,
704 void** data,
705 int* info,
706 MPI_Comm comm);
707
708 // helper method - just calls the superlu method dCompRow_to_CompCol to
709 // convert the c-style vectors of a cr matrix to a cc matrix
710 void superlu_cr_to_cc(int nrow,
711 int ncol,
712 int nnz,
713 double* cr_values,
714 int* cr_index,
715 int* cr_start,
716 double** cc_values,
717 int** cc_index,
718 int** cc_start);
719 }
720#endif
721
722
723 //===================================================================
724 // Interface to SuperLU wrapper extras
725 //===================================================================
726 extern "C"
727 {
728 /// Function to calculate the number of bytes used to store the
729 /// LU factors
731
732 /// Function to calculate the number of bytes used in calculating
733 /// and storing the LU factors
735 }
736
737#ifdef OOMPH_HAS_MPI
738 //===================================================================
739 // Interface to SuperLU_DIST wrapper extras
740 //===================================================================
741 extern "C"
742 {
743 /// Function to calculate the number of bytes used to store the
744 /// LU factors
746
747 /// Function to calculate the number of bytes used in calculating
748 /// and storing the LU factors
750 }
751#endif
752
753 //=============================================================================
754 /// How much memory do the LU factors take up? In bytes
755 /// NOTE: This has been scraped from dQuerySpace(...) in dmemory.c in
756 /// external_src/oomph_superlu_4.3
757 //=============================================================================
759 {
760 // If we're using the non-distributed version of SuperLU and the LU
761 // factors have also been computed
762 if ((Solver_type != Distributed) && (Serial_f_factors != 0))
763 {
765 }
766#ifdef OOMPH_HAS_MPI
767 // If we're using SuperLU dist and the LU factors have been computed
769 {
771 }
772#endif
773 // If the factors haven't been computed we can't do anything
774 else
775 {
776 return 0.0;
777 }
778 } // End of get_memory_usage_for_lu_factors
779
780
781 //=============================================================================
782 /// How much memory was used in total? In bytes
783 /// NOTE: This has been scraped from dQuerySpace(...) in dmemory.c in
784 /// external_src/oomph_superlu_4.3
785 //=============================================================================
787 {
788 // If we're using the non-distributed version of SuperLU and the LU
789 // factors have also been computed
790 if ((Solver_type != Distributed) && (Serial_f_factors != 0))
791 {
793 }
794#ifdef OOMPH_HAS_MPI
795 // If we're using SuperLU dist and the LU factors have been computed
797 {
799 }
800#endif
801 // If the factors haven't been computed we can't do anything
802 else
803 {
804 return 0.0;
805 }
806 } // End of get_total_needed_memory
807
808
809 //==========================================================================
810 /// Solver: Takes pointer to problem and returns the results Vector
811 /// which contains the solution of the linear system defined by
812 /// the problem's fully assembled Jacobian and residual Vector.
813 //==========================================================================
814 void SuperLUSolver::solve(Problem* const& problem_pt, DoubleVector& result)
815 {
816 // wipe memory
817 this->clean_up_memory();
818
819#ifdef OOMPH_HAS_MPI
820 // USING SUPERLU DIST
821 /// //////////////////
822 if (Solver_type == Distributed ||
823 (Solver_type == Default && problem_pt->communicator_pt()->nproc() > 1))
824 {
825 // init the timers
826 double t_start = TimingHelpers::timer();
827
828 // number of dofs
829 unsigned n_dof = problem_pt->ndof();
830
831 // set the distribution
833 problem_pt->communicator_pt(), n_dof, !Dist_use_global_solver);
834 this->build_distribution(dist);
835
836 // Take a copy of Delete_matrix_data
837 bool copy_of_Delete_matrix_data = Dist_delete_matrix_data;
838
839 // Set Delete_matrix to true
841
842 // Use the distributed version of SuperLU_DIST?
844 {
845 // Initialise timer
846 double t_start = TimingHelpers::timer();
847
848 // Storage for the residuals vector
849 DoubleVector residuals(this->distribution_pt(), 0.0);
850
851 // Get the sparse jacobian and residuals of the problem
852 CRDoubleMatrix jacobian(this->distribution_pt());
853 problem_pt->get_jacobian(residuals, jacobian);
854
855 // Doc time for setup
856 double t_end = TimingHelpers::timer();
857 Jacobian_setup_time = t_end - t_start;
858 if (Doc_time)
859 {
860 oomph_info << "Time to set up CRDoubleMatrix Jacobian : "
863 << std::endl;
864 }
865
866 // Now call the linear algebra solve, if desired
867 if (!Suppress_solve)
868 {
869 // If the distribution of the result has been build and
870 // does not match that of
871 // the solver then redistribute before the solve and return
872 // to the incoming distribution afterwards.
873 if ((result.built()) &&
874 (!(*result.distribution_pt() == *this->distribution_pt())))
875 {
876 LinearAlgebraDistribution temp_global_dist(
877 result.distribution_pt());
878 result.build(this->distribution_pt(), 0.0);
879 solve(&jacobian, residuals, result);
880 result.redistribute(&temp_global_dist);
881 }
882 else
883 {
884 solve(&jacobian, residuals, result);
885 }
886 }
887 }
888 // Otherwise its the global solve version
889 else
890 {
891 // Storage for the residuals vector
892 // A non-distriubted residuals vector
894 problem_pt->communicator_pt(), problem_pt->ndof(), false);
895 DoubleVector residuals(&dist, 0.0);
896 CRDoubleMatrix jacobian(&dist);
897
898 // Get the sparse jacobian and residuals of the problem
899 problem_pt->get_jacobian(residuals, jacobian);
900
901 // Doc time for setup
902 double t_end = TimingHelpers::timer();
903 Jacobian_setup_time = t_end - t_start;
904 if (Doc_time)
905 {
906 oomph_info << "Time to set up CR Jacobian : "
909 << std::endl;
910 }
911
912 // Now call the linear algebra solve, if desired
913 if (!Suppress_solve)
914 {
915 // If the result distribution has been built and
916 // does not match the global distribution
917 // the redistribute before the solve and then return to the
918 // distributed version afterwards
919 if ((result.built()) && (!(*result.distribution_pt() == dist)))
920 {
921 LinearAlgebraDistribution temp_global_dist(
922 result.distribution_pt());
923 result.build(&dist, 0.0);
924 solve(&jacobian, residuals, result);
925 result.redistribute(&temp_global_dist);
926 }
927 else
928 {
929 solve(&jacobian, residuals, result);
930 }
931 }
932 }
933 // Set Delete_matrix back to original value
934 Dist_delete_matrix_data = copy_of_Delete_matrix_data;
935 }
936
937 // OTHERWISE WE ARE USING SUPERLU (SERIAL)
938 /// ///////////////////////////////////////
939 else
940#endif
941 {
942 // set the solver distribution
944 problem_pt->communicator_pt(), problem_pt->ndof(), false);
945 this->build_distribution(dist);
946
947 // Allocate storage for the residuals vector
948 DoubleVector residuals(dist, 0.0);
949
950 // Use the compressed row version?
952 {
953 // Initialise timer
954 double t_start = TimingHelpers::timer();
955
956 // Get the sparse jacobian and residuals of the problem
957 CRDoubleMatrix CR_jacobian(this->distribution_pt());
958 problem_pt->get_jacobian(residuals, CR_jacobian);
959
960 // If we want to compute the gradient for the globally convergent
961 // Newton method, then do it here
963 {
964 // Compute it
965 CR_jacobian.multiply_transpose(residuals,
967 // Set the flag
969 }
970
971 // Doc time for setup
972 double t_end = TimingHelpers::timer();
973 Jacobian_setup_time = t_end - t_start;
974 if (Doc_time)
975 {
976 oomph_info << std::endl
977 << "Time to set up CRDoubleMatrix Jacobian : "
980 << std::endl;
981 }
982
983 // Now call the linear algebra solve, if desired
984 if (!Suppress_solve)
985 {
986 // If the result vector is built and distributed
987 // then need to redistribute into the same form as the
988 // RHS (non-distributed)
989 if ((result.built()) &&
990 (!(*result.distribution_pt() == *this->distribution_pt())))
991 {
992 LinearAlgebraDistribution temp_global_dist(
993 result.distribution_pt());
994 result.build(this->distribution_pt(), 0.0);
995 solve(&CR_jacobian, residuals, result);
996 result.redistribute(&temp_global_dist);
997 }
998 // Otherwise just solve
999 else
1000 {
1001 solve(&CR_jacobian, residuals, result);
1002 }
1003 }
1004 }
1005 // Otherwise its the compressed column version
1006 else
1007 {
1008 // Initialise timer
1009 double t_start = TimingHelpers::timer();
1010
1011 // Get the sparse jacobian and residuals of the problem
1012 CCDoubleMatrix CC_jacobian;
1013 problem_pt->get_jacobian(residuals, CC_jacobian);
1014
1015 // If we want to compute the gradient for the globally convergent
1016 // Newton method, then do it here
1017 if (Compute_gradient)
1018 {
1019 // Compute it
1020 CC_jacobian.multiply_transpose(residuals,
1022 // Set the flag
1024 }
1025
1026 // Doc time for setup
1027 double t_end = TimingHelpers::timer();
1028 Jacobian_setup_time = t_end - t_start;
1029 if (Doc_time)
1030 {
1031 oomph_info << "\nTime to set up CCDoubleMatrix Jacobian: "
1034 << std::endl;
1035 }
1036
1037 // Now call the linear algebra solve, if desired
1038 if (!Suppress_solve)
1039 {
1040 // If the result vector is built and distributed
1041 // then need to redistribute into the same form as the
1042 // RHS
1043 if ((result.built()) &&
1044 (!(*result.distribution_pt() == *this->distribution_pt())))
1045 {
1046 LinearAlgebraDistribution temp_global_dist(
1047 result.distribution_pt());
1048 result.build(this->distribution_pt(), 0.0);
1049 solve(&CC_jacobian, residuals, result);
1050 result.redistribute(&temp_global_dist);
1051 }
1052 // Otherwise just solve
1053 else
1054 {
1055 solve(&CC_jacobian, residuals, result);
1056 }
1057 }
1058 }
1059
1060 // Set the sign of the jacobian
1061 //(this is computed in the LU decomposition phase)
1063 }
1064 }
1065
1066 //=========================================================================
1067 /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
1068 /// vector and returns the solution of the linear system. Problem pointer
1069 /// defaults to NULL and can be omitted. The function returns the global
1070 /// result Vector.
1071 /// Note: if Delete_matrix_data is true the function
1072 /// matrix_pt->clean_up_memory() will be used to wipe the matrix data.
1073 //=========================================================================
1075 const DoubleVector& rhs,
1076 DoubleVector& result)
1077 {
1078 // Initialise timer
1079 double t_start = TimingHelpers::timer();
1080
1081 // Pointer used in various places
1082 CRDoubleMatrix* cr_pt = 0;
1083
1084
1085#ifdef PARANOID
1086 // check that the rhs vector is setup
1087 if (!rhs.built())
1088 {
1089 std::ostringstream error_message_stream;
1090 error_message_stream << "The vectors rhs must be setup";
1091 throw OomphLibError(error_message_stream.str(),
1092 OOMPH_CURRENT_FUNCTION,
1093 OOMPH_EXCEPTION_LOCATION);
1094 }
1095
1096 // check that the matrix is square
1097 if (matrix_pt->nrow() != matrix_pt->ncol())
1098 {
1099 std::ostringstream error_message_stream;
1100 error_message_stream << "The matrix at matrix_pt must be square.";
1101 throw OomphLibError(error_message_stream.str(),
1102 OOMPH_CURRENT_FUNCTION,
1103 OOMPH_EXCEPTION_LOCATION);
1104 }
1105
1106 // check that the matrix has some entries, and so has a values_pt that
1107 // makes sense (only for CR because CC is never used I think dense
1108 // matrices will be safe since they don't use a values pointer).
1109 cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1110 if (cr_pt != 0)
1111 {
1112 if (cr_pt->nnz() == 0)
1113 {
1114 std::ostringstream error_message_stream;
1115 error_message_stream
1116 << "Attempted to call SuperLu on a CRDoubleMatrix with no entries, "
1117 << "SuperLU would segfault (because the values array pt is "
1118 << "uninitialised or null).";
1119 throw OomphLibError(error_message_stream.str(),
1120 OOMPH_CURRENT_FUNCTION,
1121 OOMPH_EXCEPTION_LOCATION);
1122 }
1123 }
1124
1125 // check that the matrix and the rhs vector have the same nrow()
1126 if (matrix_pt->nrow() != rhs.nrow())
1127 {
1128 std::ostringstream error_message_stream;
1129 error_message_stream
1130 << "The matrix and the rhs vector must have the same number of rows.";
1131 throw OomphLibError(error_message_stream.str(),
1132 OOMPH_CURRENT_FUNCTION,
1133 OOMPH_EXCEPTION_LOCATION);
1134 }
1135
1136 // if the matrix is distributable then should have the same distribution
1137 // as the rhs vector
1138 DistributableLinearAlgebraObject* dist_matrix_pt =
1139 dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
1140 if (dist_matrix_pt != 0)
1141 {
1142 if (!(*dist_matrix_pt->distribution_pt() == *rhs.distribution_pt()))
1143 {
1144 std::ostringstream error_message_stream;
1145 error_message_stream
1146 << "The matrix matrix_pt must have the same distribution as the "
1147 << "rhs vector.";
1148 throw OomphLibError(error_message_stream.str(),
1149 OOMPH_CURRENT_FUNCTION,
1150 OOMPH_EXCEPTION_LOCATION);
1151 }
1152 }
1153 // if the matrix is not distributable then it the rhs vector should not be
1154 // distributed
1155 else
1156 {
1157 if (rhs.distribution_pt()->distributed())
1158 {
1159 std::ostringstream error_message_stream;
1160 error_message_stream
1161 << "The matrix (matrix_pt) is not distributable and therefore the rhs"
1162 << " vector must not be distributed";
1163 throw OomphLibError(error_message_stream.str(),
1164 OOMPH_CURRENT_FUNCTION,
1165 OOMPH_EXCEPTION_LOCATION);
1166 }
1167 }
1168 // if the result vector is setup then check it has the same distribution
1169 // as the rhs
1170 if (result.built())
1171 {
1172 if (!(*result.distribution_pt() == *rhs.distribution_pt()))
1173 {
1174 std::ostringstream error_message_stream;
1175 error_message_stream
1176 << "The result vector distribution has been setup; it must have the "
1177 << "same distribution as the rhs vector.";
1178 throw OomphLibError(error_message_stream.str(),
1179 OOMPH_CURRENT_FUNCTION,
1180 OOMPH_EXCEPTION_LOCATION);
1181 }
1182 }
1183#endif
1184
1185 // set the distribution
1186 if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt))
1187 {
1188 // the solver has the same distribution as the matrix if possible
1189 this->build_distribution(
1190 dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
1191 ->distribution_pt());
1192 }
1193 else
1194 {
1195 // the solver has the same distribution as the RHS
1197 }
1198
1199 // Doc time for solve
1200 double t_factorise_start = TimingHelpers::timer();
1201
1202 // Factorise the matrix
1203 factorise(matrix_pt);
1204
1205 // Doc the end time
1206 double t_factorise_end = TimingHelpers::timer();
1207
1208 // How long did the factorisation take?
1209 double factorise_time = t_factorise_end - t_factorise_start;
1210
1211 // Try and upcast the matrix to a CRDoubleMatrix
1212 // CRDoubleMatrix*
1213 cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1214
1215 // If the input matrix is a CRDoubleMatrix
1216 if (cr_pt != 0)
1217 {
1218 // ...and actually has an entry
1219 if (cr_pt->nnz() != 0)
1220 {
1221 // Find out how many rows there are in the global Jacobian
1222 unsigned n_row = cr_pt->nrow();
1223
1224 // And how many non-zeros there are in the global Jacobian
1225 unsigned n_nnz = cr_pt->nnz();
1226
1227 // Get the memory usage (in bytes) for the global Jacobian storage
1228 double memory_usage_for_jacobian =
1229 ((2 * ((n_row + 1) * sizeof(int))) +
1230 (n_nnz * (sizeof(int) + sizeof(double))));
1231
1232 // Get the memory usage (in bytes) for storage of the LU factors in
1233 // SuperLU
1234 double memory_usage_for_lu_storage = get_total_needed_memory();
1235
1236 // Get the memory usage (in bytes) for storage of the LU factors in
1237 // SuperLU
1238 double total_memory_usage =
1239 memory_usage_for_jacobian + memory_usage_for_lu_storage;
1240
1241 // How much memory have we used in the subsidiary preconditioners?
1242 oomph_info << "\nMemory statistics:"
1243 << "\n - Memory used to store the Jacobian (MB) : "
1244 << memory_usage_for_jacobian / 1.0e+06
1245 << "\n - Memory used to store the LU factors (MB) : "
1246 << memory_usage_for_lu_storage / 1.0e+06
1247 << "\n - Total memory used for matrix storage (MB): "
1248 << total_memory_usage / 1.0e+06 << "\n"
1249 << std::endl;
1250 }
1251 } // if (cr_pt!=0)
1252
1253 // Doc the start time
1254 double t_backsub_start = TimingHelpers::timer();
1255
1256 // Now do the back solve
1257 backsub(rhs, result);
1258
1259 // Doc the end time
1260 double t_backsub_end = TimingHelpers::timer();
1261
1262 // How long did the back substitution take?
1263 double backsub_time = t_backsub_end - t_backsub_start;
1264
1265 // Doc time for solve
1266 double t_end = TimingHelpers::timer();
1267 Solution_time = t_end - t_start;
1268 if (Doc_time)
1269 {
1271 << "Time for LU factorisation : "
1273 << "\nTime for back-substitution: "
1275 << "\nTime for SuperLUSolver solve (ndof=" << matrix_pt->nrow() << "): "
1277 << std::endl;
1278 }
1279
1280 // If we are not storing the solver data for resolves, delete it
1281 if (!Enable_resolve)
1282 {
1284 }
1285 }
1286
1287
1288 //=============================================================================
1289 /// Solver: Takes pointer to problem and returns the results Vector
1290 /// which contains the solution of the linear system defined by
1291 /// the problem's fully assembled Jacobian and residual Vector.
1292 //=============================================================================
1294 DoubleVector& result)
1295 {
1296 // wipe memory
1297 this->clean_up_memory();
1298
1299#ifdef OOMPH_HAS_MPI
1300 // USING SUPERLU DIST
1301 /// //////////////////
1302 if (Solver_type == Distributed ||
1303 (Solver_type == Default && problem_pt->communicator_pt()->nproc() > 1))
1304 {
1305 // init the timers
1306 double t_start = TimingHelpers::timer();
1307
1308 // number of dofs
1309 unsigned n_dof = problem_pt->ndof();
1310
1311 // set the distribution
1313 problem_pt->communicator_pt(), n_dof, !Dist_use_global_solver);
1314 this->build_distribution(dist);
1315
1316 // Take a copy of Delete_matrix_data
1317 bool copy_of_Delete_matrix_data = Dist_delete_matrix_data;
1318
1319 // Set Delete_matrix to true
1321
1322 // Use the distributed version of SuperLU_DIST?
1324 {
1325 // Initialise timer
1326 double t_start = TimingHelpers::timer();
1327
1328 // Storage for the residuals vector
1329 DoubleVector residuals(this->distribution_pt(), 0.0);
1330
1331 // Get the sparse jacobian and residuals of the problem
1332 CRDoubleMatrix jacobian(this->distribution_pt());
1333 problem_pt->get_jacobian(residuals, jacobian);
1334
1335 // Doc time for setup
1336 double t_end = TimingHelpers::timer();
1337 Jacobian_setup_time = t_end - t_start;
1338 if (Doc_time)
1339 {
1340 oomph_info << "Time to set up CRDoubleMatrix Jacobian : "
1343 << std::endl;
1344 }
1345
1346 // Now call the linear algebra solve, if desired
1347 if (!Suppress_solve)
1348 {
1349 // If the distribution of the result has been build and
1350 // does not match that of
1351 // the solver then redistribute before the solve and return
1352 // to the incoming distribution afterwards.
1353 if ((result.built()) &&
1354 (!(*result.distribution_pt() == *this->distribution_pt())))
1355 {
1356 LinearAlgebraDistribution temp_global_dist(
1357 result.distribution_pt());
1358 result.build(this->distribution_pt(), 0.0);
1359 solve_transpose(&jacobian, residuals, result);
1360 result.redistribute(&temp_global_dist);
1361 }
1362 else
1363 {
1364 solve_transpose(&jacobian, residuals, result);
1365 }
1366 }
1367 }
1368 // Otherwise its the global solve version
1369 else
1370 {
1371 // Storage for the residuals vector
1372 // A non-distriubted residuals vector
1374 problem_pt->communicator_pt(), problem_pt->ndof(), false);
1375 DoubleVector residuals(&dist, 0.0);
1376 CRDoubleMatrix jacobian(&dist);
1377
1378 // Get the sparse jacobian and residuals of the problem
1379 problem_pt->get_jacobian(residuals, jacobian);
1380
1381 // Doc time for setup
1382 double t_end = TimingHelpers::timer();
1383 Jacobian_setup_time = t_end - t_start;
1384 if (Doc_time)
1385 {
1386 oomph_info << "Time to set up CR Jacobian : "
1389 << std::endl;
1390 }
1391
1392 // Now call the linear algebra solve, if desired
1393 if (!Suppress_solve)
1394 {
1395 // If the result distribution has been built and
1396 // does not match the global distribution
1397 // the redistribute before the solve and then return to the
1398 // distributed version afterwards
1399 if ((result.built()) && (!(*result.distribution_pt() == dist)))
1400 {
1401 LinearAlgebraDistribution temp_global_dist(
1402 result.distribution_pt());
1403 result.build(&dist, 0.0);
1404 solve_transpose(&jacobian, residuals, result);
1405 result.redistribute(&temp_global_dist);
1406 }
1407 else
1408 {
1409 solve_transpose(&jacobian, residuals, result);
1410 }
1411 }
1412 }
1413 // Set Delete_matrix back to original value
1414 Dist_delete_matrix_data = copy_of_Delete_matrix_data;
1415 }
1416
1417 // OTHERWISE WE ARE USING SUPERLU (SERIAL)
1418 /// ///////////////////////////////////////
1419 else
1420#endif
1421 {
1422 // set the solver distribution
1424 problem_pt->communicator_pt(), problem_pt->ndof(), false);
1425 this->build_distribution(dist);
1426
1427 // Allocate storage for the residuals vector
1428 DoubleVector residuals(dist, 0.0);
1429
1430 // Use the compressed row version?
1432 {
1433 // Initialise timer
1434 double t_start = TimingHelpers::timer();
1435
1436 // Get the sparse jacobian and residuals of the problem
1437 CRDoubleMatrix CR_jacobian(this->distribution_pt());
1438 problem_pt->get_jacobian(residuals, CR_jacobian);
1439
1440 // If we want to compute the gradient for the globally convergent
1441 // Newton method, then do it here
1442 if (Compute_gradient)
1443 {
1444 // Compute it
1445 CR_jacobian.multiply_transpose(residuals,
1447 // Set the flag
1449 }
1450
1451 // Doc time for setup
1452 double t_end = TimingHelpers::timer();
1453 Jacobian_setup_time = t_end - t_start;
1454 if (Doc_time)
1455 {
1456 oomph_info << std::endl
1457 << "Time to set up CRDoubleMatrix Jacobian: "
1460 << std::endl;
1461 }
1462
1463 // Now call the linear algebra solve, if desired
1464 if (!Suppress_solve)
1465 {
1466 // If the result vector is built and distributed
1467 // then need to redistribute into the same form as the
1468 // RHS (non-distributed)
1469 if ((result.built()) &&
1470 (!(*result.distribution_pt() == *this->distribution_pt())))
1471 {
1472 LinearAlgebraDistribution temp_global_dist(
1473 result.distribution_pt());
1474 result.build(this->distribution_pt(), 0.0);
1475 solve_transpose(&CR_jacobian, residuals, result);
1476 result.redistribute(&temp_global_dist);
1477 }
1478 // Otherwise just solve
1479 else
1480 {
1481 solve_transpose(&CR_jacobian, residuals, result);
1482 }
1483 }
1484 }
1485 // Otherwise its the compressed column version
1486 else
1487 {
1488 // Initialise timer
1489 double t_start = TimingHelpers::timer();
1490
1491 // Get the sparse jacobian and residuals of the problem
1492 CCDoubleMatrix CC_jacobian;
1493 problem_pt->get_jacobian(residuals, CC_jacobian);
1494
1495 // If we want to compute the gradient for the globally convergent
1496 // Newton method, then do it here
1497 if (Compute_gradient)
1498 {
1499 // Compute it
1500 CC_jacobian.multiply_transpose(residuals,
1502 // Set the flag
1504 }
1505
1506 // Doc time for setup
1507 double t_end = TimingHelpers::timer();
1508 Jacobian_setup_time = t_end - t_start;
1509 if (Doc_time)
1510 {
1511 oomph_info << "\nTime to set up CCDoubleMatrix Jacobian: "
1514 << std::endl;
1515 }
1516
1517 // Now call the linear algebra solve, if desired
1518 if (!Suppress_solve)
1519 {
1520 // If the result vector is built and distributed
1521 // then need to redistribute into the same form as the
1522 // RHS
1523 if ((result.built()) &&
1524 (!(*result.distribution_pt() == *this->distribution_pt())))
1525 {
1526 LinearAlgebraDistribution temp_global_dist(
1527 result.distribution_pt());
1528 result.build(this->distribution_pt(), 0.0);
1529 solve_transpose(&CC_jacobian, residuals, result);
1530 result.redistribute(&temp_global_dist);
1531 }
1532 // Otherwise just solve
1533 else
1534 {
1535 solve_transpose(&CC_jacobian, residuals, result);
1536 }
1537 }
1538 }
1539
1540 // Set the sign of the jacobian
1541 //(this is computed in the LU decomposition phase)
1543 }
1544 }
1545
1546 //=========================================================================
1547 /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
1548 /// vector and returns the solution of the linear system. Problem pointer
1549 /// defaults to NULL and can be omitted. The function returns the global
1550 /// result Vector.
1551 /// Note: if Delete_matrix_data is true the function
1552 /// matrix_pt->clean_up_memory() will be used to wipe the matrix data.
1553 //=========================================================================
1555 const DoubleVector& rhs,
1556 DoubleVector& result)
1557 {
1558 // Initialise timer
1559 double t_start = TimingHelpers::timer();
1560
1561 // Pointer used in various places
1562 CRDoubleMatrix* cr_pt = 0;
1563
1564#ifdef PARANOID
1565 // check that the rhs vector is setup
1566 if (!rhs.built())
1567 {
1568 std::ostringstream error_message_stream;
1569 error_message_stream << "The vectors rhs must be setup";
1570 throw OomphLibError(error_message_stream.str(),
1571 OOMPH_CURRENT_FUNCTION,
1572 OOMPH_EXCEPTION_LOCATION);
1573 }
1574
1575 // check that the matrix is square
1576 if (matrix_pt->nrow() != matrix_pt->ncol())
1577 {
1578 std::ostringstream error_message_stream;
1579 error_message_stream << "The matrix at matrix_pt must be square.";
1580 throw OomphLibError(error_message_stream.str(),
1581 OOMPH_CURRENT_FUNCTION,
1582 OOMPH_EXCEPTION_LOCATION);
1583 }
1584
1585 // check that the matrix has some entries, and so has a values_pt that
1586 // makes sense (only for CR because CC is never used I think dense
1587 // matrices will be safe since they don't use a values pointer).
1588 cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1589 if (cr_pt != 0)
1590 {
1591 if (cr_pt->nnz() == 0)
1592 {
1593 std::ostringstream error_message_stream;
1594 error_message_stream
1595 << "Attempted to call SuperLu on a CRDoubleMatrix with no entries, "
1596 << "SuperLU would segfault (because the values array pt is "
1597 << "uninitialised or null).";
1598 throw OomphLibError(error_message_stream.str(),
1599 OOMPH_CURRENT_FUNCTION,
1600 OOMPH_EXCEPTION_LOCATION);
1601 }
1602 }
1603
1604 // check that the matrix and the rhs vector have the same nrow()
1605 if (matrix_pt->nrow() != rhs.nrow())
1606 {
1607 std::ostringstream error_message_stream;
1608 error_message_stream
1609 << "The matrix and the rhs vector must have the same number of rows.";
1610 throw OomphLibError(error_message_stream.str(),
1611 OOMPH_CURRENT_FUNCTION,
1612 OOMPH_EXCEPTION_LOCATION);
1613 }
1614
1615 // if the matrix is distributable then should have the same distribution
1616 // as the rhs vector
1617 DistributableLinearAlgebraObject* dist_matrix_pt =
1618 dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
1619 if (dist_matrix_pt != 0)
1620 {
1621 if (!(*dist_matrix_pt->distribution_pt() == *rhs.distribution_pt()))
1622 {
1623 std::ostringstream error_message_stream;
1624 error_message_stream
1625 << "The matrix matrix_pt must have the same distribution as the "
1626 << "rhs vector.";
1627 throw OomphLibError(error_message_stream.str(),
1628 OOMPH_CURRENT_FUNCTION,
1629 OOMPH_EXCEPTION_LOCATION);
1630 }
1631 }
1632 // if the matrix is not distributable then it the rhs vector should not be
1633 // distributed
1634 else
1635 {
1636 if (rhs.distribution_pt()->distributed())
1637 {
1638 std::ostringstream error_message_stream;
1639 error_message_stream
1640 << "The matrix (matrix_pt) is not distributable and therefore the rhs"
1641 << " vector must not be distributed";
1642 throw OomphLibError(error_message_stream.str(),
1643 OOMPH_CURRENT_FUNCTION,
1644 OOMPH_EXCEPTION_LOCATION);
1645 }
1646 }
1647 // if the result vector is setup then check it has the same distribution
1648 // as the rhs
1649 if (result.built())
1650 {
1651 if (!(*result.distribution_pt() == *rhs.distribution_pt()))
1652 {
1653 std::ostringstream error_message_stream;
1654 error_message_stream
1655 << "The result vector distribution has been setup; it must have the "
1656 << "same distribution as the rhs vector.";
1657 throw OomphLibError(error_message_stream.str(),
1658 OOMPH_CURRENT_FUNCTION,
1659 OOMPH_EXCEPTION_LOCATION);
1660 }
1661 }
1662#endif
1663
1664 // set the distribution
1665 if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt))
1666 {
1667 // the solver has the same distribution as the matrix if possible
1668 this->build_distribution(
1669 dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
1670 ->distribution_pt());
1671 }
1672 else
1673 {
1674 // the solver has the same distribution as the RHS
1676 }
1677
1678 // Doc time for solve
1679 double t_factorise_start = TimingHelpers::timer();
1680
1681 // Factorise the matrix
1682 factorise(matrix_pt);
1683
1684 // Doc the end time
1685 double t_factorise_end = TimingHelpers::timer();
1686
1687 // How long did the factorisation take?
1688 double factorise_time = t_factorise_end - t_factorise_start;
1689
1690 // Try and upcast the matrix to a CRDoubleMatrix
1691 // CRDoubleMatrix*
1692 cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1693
1694 // If the input matrix is a CRDoubleMatrix
1695 if (cr_pt != 0)
1696 {
1697 // ...and actually has an entry
1698 if (cr_pt->nnz() != 0)
1699 {
1700 // Find out how many rows there are in the global Jacobian
1701 unsigned n_row = cr_pt->nrow();
1702
1703 // And how many non-zeros there are in the global Jacobian
1704 unsigned n_nnz = cr_pt->nnz();
1705
1706 // Get the memory usage (in bytes) for the global Jacobian storage
1707 double memory_usage_for_jacobian =
1708 ((2 * ((n_row + 1) * sizeof(int))) +
1709 (n_nnz * (sizeof(int) + sizeof(double))));
1710
1711 // Get the memory usage (in bytes) for storage of the LU factors in
1712 // SuperLU
1713 double memory_usage_for_lu_storage = get_total_needed_memory();
1714
1715 // Get the memory usage (in bytes) for storage of the LU factors in
1716 // SuperLU
1717 double total_memory_usage =
1718 memory_usage_for_jacobian + memory_usage_for_lu_storage;
1719
1720 // How much memory have we used in the subsidiary preconditioners?
1721 oomph_info << "\nMemory statistics:"
1722 << "\n - Memory used to store the Jacobian (MB): "
1723 << memory_usage_for_jacobian / 1.0e+06
1724 << "\n - Memory used to store the LU factors (MB): "
1725 << memory_usage_for_lu_storage / 1.0e+06
1726 << "\n - Total memory used for matrix storage (MB): "
1727 << total_memory_usage / 1.0e+06 << "\n"
1728 << std::endl;
1729 }
1730 } // if (cr_pt!=0)
1731
1732 // Doc the start time
1733 double t_backsub_start = TimingHelpers::timer();
1734
1735 // Now do the back solve
1736 backsub_transpose(rhs, result);
1737
1738 // Doc the end time
1739 double t_backsub_end = TimingHelpers::timer();
1740
1741 // How long did the back substitution take?
1742 double backsub_time = t_backsub_end - t_backsub_start;
1743
1744 // Doc time for solve
1745 double t_end = TimingHelpers::timer();
1746 Solution_time = t_end - t_start;
1747 if (Doc_time)
1748 {
1750 << "Time for LU factorisation : "
1752 << "\nTime for back-substitution: "
1754 << "\nTime for SuperLUSolver solve (ndof=" << matrix_pt->nrow() << "): "
1756 << std::endl;
1757 }
1758
1759 // If we are not storing the solver data for resolves, delete it
1760 if (!Enable_resolve)
1761 {
1763 }
1764 } // End of solve_transpose
1765
1766 //===============================================================
1767 /// Resolve the system for a given RHS
1768 //===============================================================
1770 {
1771 // Store starting time for solve
1772 double t_start = TimingHelpers::timer();
1773
1774 // backsub
1775 backsub(rhs, result);
1776
1777 // Doc time for solve
1778 double t_end = TimingHelpers::timer();
1779 Solution_time = t_end - t_start;
1780 if (Doc_time)
1781 {
1782 oomph_info << "Time for SuperLUSolver solve (ndof=" << rhs.nrow() << "): "
1784 t_start)
1785 << std::endl;
1786 }
1787 }
1788
1789
1790 //===============================================================
1791 /// Resolve the (transposed) system for a given RHS
1792 //===============================================================
1794 DoubleVector& result)
1795 {
1796 // Store starting time for solve
1797 double t_start = TimingHelpers::timer();
1798
1799 // Backsub (but solve the transposed system)
1800 backsub_transpose(rhs, result);
1801
1802 // Doc time for solve
1803 double t_end = TimingHelpers::timer();
1804 Solution_time = t_end - t_start;
1805 if (Doc_time)
1806 {
1807 oomph_info << "Time for SuperLUSolver solve (ndof=" << rhs.nrow() << "): "
1809 t_start)
1810 << std::endl;
1811 }
1812 }
1813
1814
1815 //===================================================================
1816 /// LU decompose the matrix addressed by matrix_pt by using
1817 /// the SuperLU solver. The resulting matrix factors are stored
1818 /// internally.
1819 //===================================================================
1821 {
1822 // wipe memory
1823 this->clean_up_memory();
1824
1825 // if we have mpi and the solver is distributed or default and nproc
1826 // gt 1
1827#ifdef OOMPH_HAS_MPI
1828 DistributableLinearAlgebraObject* dist_matrix_pt =
1829 dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
1830 unsigned nproc = 1;
1831 if (dist_matrix_pt != 0)
1832 {
1833 nproc = dist_matrix_pt->distribution_pt()->communicator_pt()->nproc();
1834 }
1835 if (Solver_type == Distributed || (Solver_type == Default && nproc > 1 &&
1837 {
1838 // if the matrix is a distributed linear algebra object then use SuperLU
1839 // dist
1840 if (dist_matrix_pt != 0)
1841 {
1842 factorise_distributed(matrix_pt);
1843 Using_dist = true;
1844 }
1845 else
1846 {
1847 factorise_serial(matrix_pt);
1848 Using_dist = false;
1849 }
1850 }
1851 else
1852#endif
1853 {
1854 factorise_serial(matrix_pt);
1855 Using_dist = false;
1856 }
1857 }
1858
1859#ifdef OOMPH_HAS_MPI
1860 //=============================================================================
1861 /// LU decompose the matrix addressed by matrix_pt using
1862 /// the SuperLU_DIST solver. The resulting matrix factors are stored
1863 /// internally.
1864 //=============================================================================
1866 {
1867 // Check that we have a square matrix
1868#ifdef PARANOID
1869 int m = matrix_pt->ncol();
1870 int n = matrix_pt->nrow();
1871 if (n != m)
1872 {
1873 std::ostringstream error_message_stream;
1874 error_message_stream << "Can only solve for square matrices\n"
1875 << "N, M " << n << " " << m << std::endl;
1876
1877 throw OomphLibError(error_message_stream.str(),
1878 OOMPH_CURRENT_FUNCTION,
1879 OOMPH_EXCEPTION_LOCATION);
1880 }
1881#endif
1882
1883 // number of processors
1884 unsigned nproc = MPI_Helpers::communicator_pt()->nproc();
1885 if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
1886 {
1887 nproc = dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
1888 ->distribution_pt()
1889 ->communicator_pt()
1890 ->nproc();
1891 }
1892
1893 // Find number of rows and columns for the process grid
1894 // First guess at number of rows:
1895 int nprow = int(sqrt(double(nproc)));
1896
1897 // Does this evenly divide the processor grid?
1898 while (nprow > 1)
1899 {
1900 if (nproc % nprow == 0) break;
1901 nprow -= 1;
1902 }
1903
1904 // Store Number of rows/columns for process grid
1905 Dist_nprow = nprow;
1906 Dist_npcol = nproc / Dist_nprow;
1907
1908 // Make sure any existing factors are deleted
1910
1911 // Doc (0/1) = (true/false)
1912 int doc = !Doc_stats;
1913
1914 // Rset Info
1915 Dist_info = 0;
1916
1917 // Flag for row and column permutations
1918 int allow_permutations = Dist_allow_row_and_col_permutations;
1919
1920 // Is it a DistributedCRDoubleMatrix?
1921 if (dynamic_cast<CRDoubleMatrix*>(matrix_pt) != 0)
1922 {
1923 // Get a cast pointer to the matrix
1924 CRDoubleMatrix* cr_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1925
1926 // Get the distribution from the matrix
1927 this->build_distribution(cr_matrix_pt->distribution_pt());
1928
1929#ifdef PARANOID
1930 // paranoid check that the matrix has been setup
1931 if (!cr_matrix_pt->built())
1932 {
1933 throw OomphLibError(
1934 "To apply SuperLUSolver to a CRDoubleMatrix - it must be built",
1935 OOMPH_CURRENT_FUNCTION,
1936 OOMPH_EXCEPTION_LOCATION);
1937 }
1938#endif
1939
1940 // if the matrix is distributed then setup setup superlu dist distributed
1941 if (cr_matrix_pt->distributed())
1942 {
1943 // Find the number of non-zero entries in the matrix
1944 const int nnz_local = int(cr_matrix_pt->nnz());
1945
1946 // Set up the pointers to the matrix.
1947 // NOTE: these arrays (accessed via value_pt, index_pt and
1948 // start_pt) may be modified by the SuperLU_DIST routines, and so
1949 // a copy must be taken if the matrix is to be preserved.
1950
1951 // Copy values
1952 Dist_value_pt = new double[nnz_local];
1953 double* matrix_value_pt = cr_matrix_pt->value();
1954 for (int i = 0; i < nnz_local; i++)
1955 {
1956 Dist_value_pt[i] = matrix_value_pt[i];
1957 }
1958
1959 // Copy column indices
1960 Dist_index_pt = new int[nnz_local];
1961 int* matrix_index_pt = cr_matrix_pt->column_index();
1962 for (int i = 0; i < nnz_local; i++)
1963 {
1964 Dist_index_pt[i] = matrix_index_pt[i];
1965 }
1966
1967 // Copy row starts
1968 int nrow_local = cr_matrix_pt->nrow_local();
1969 Dist_start_pt = new int[nrow_local + 1];
1970 int* matrix_start_pt = cr_matrix_pt->row_start();
1971 for (int i = 0; i <= nrow_local; i++)
1972 {
1973 Dist_start_pt[i] = matrix_start_pt[i];
1974 }
1975
1976 // cache
1977 int ndof = cr_matrix_pt->distribution_pt()->nrow();
1978 int first_row = cr_matrix_pt->first_row();
1979
1980 // Now delete the matrix if we are allowed
1981 if (Dist_delete_matrix_data == true)
1982 {
1983 cr_matrix_pt->clear();
1984 }
1985
1986 // Factorize
1988 1,
1989 allow_permutations,
1990 ndof,
1991 nnz_local,
1992 nrow_local,
1993 first_row,
1997 0,
1998 Dist_nprow,
1999 Dist_npcol,
2000 doc,
2002 &Dist_info,
2003 this->distribution_pt()->communicator_pt()->mpi_comm());
2004
2005 // Record that data is stored
2007 }
2008 // else the CRDoubleMatrix is not distributed
2009 else
2010 {
2011 // Find the number of non-zero entries in the matrix
2012 const int nnz = int(cr_matrix_pt->nnz());
2013
2014 // cache the number of rows
2015 int nrow = cr_matrix_pt->nrow();
2016
2017 // Set up the pointers to the matrix.
2018 // NOTE: these arrays (accessed via value_pt, index_pt and
2019 // start_pt) may be modified by the SuperLU_DIST routines, and so
2020 // a copy must be taken if the matrix is to be preserved.
2021
2022 // create the corresponing cc matrix
2024 nrow,
2025 nnz,
2026 cr_matrix_pt->value(),
2027 cr_matrix_pt->column_index(),
2028 cr_matrix_pt->row_start(),
2031 &Dist_start_pt);
2032
2033 // Delete the matrix if we are allowed
2034 if (Dist_delete_matrix_data == true)
2035 {
2036 cr_matrix_pt->clear();
2037 }
2038
2039 // do the factorization
2041 1,
2042 allow_permutations,
2043 nrow,
2044 nnz,
2048 0,
2049 Dist_nprow,
2050 Dist_npcol,
2051 doc,
2053 &Dist_info,
2054 this->distribution_pt()->communicator_pt()->mpi_comm());
2055
2056 // Record that data is stored
2058 }
2059 }
2060
2061 // Or is it a CCDoubleMatrix?
2062 else if (dynamic_cast<CCDoubleMatrix*>(matrix_pt))
2063 {
2064 // Get a cast pointer to the matrix
2065 CCDoubleMatrix* serial_matrix_pt =
2066 dynamic_cast<CCDoubleMatrix*>(matrix_pt);
2067
2068 // Find the number of non-zero entries in the matrix
2069 const int nnz = int(serial_matrix_pt->nnz());
2070
2071 // Find # of degrees of freedom (variables)
2072 int ndof = int(serial_matrix_pt->nrow());
2073
2074 // Find the local number of degrees of freedom in the linear system
2075 int ndof_local = ndof;
2076
2077 // Set up the pointers to the matrix.
2078 // NOTE: these arrays (accessed via value_pt, index_pt and
2079 // start_pt) may be modified by the SuperLU_DIST routines, and so
2080 // a copy must be taken if the matrix is to be preserved.
2081
2082 // Copy values
2083 Dist_value_pt = new double[nnz];
2084 double* matrix_value_pt = serial_matrix_pt->value();
2085 for (int i = 0; i < nnz; i++)
2086 {
2087 Dist_value_pt[i] = matrix_value_pt[i];
2088 }
2089
2090 // copy row indices
2091 Dist_index_pt = new int[nnz];
2092 int* matrix_index_pt = serial_matrix_pt->row_index();
2093 for (int i = 0; i < nnz; i++)
2094 {
2095 Dist_index_pt[i] = matrix_index_pt[i];
2096 }
2097
2098 // copy column starts
2099 Dist_start_pt = new int[ndof_local + 1];
2100 int* matrix_start_pt = serial_matrix_pt->column_start();
2101 for (int i = 0; i <= ndof_local; i++)
2102 {
2103 Dist_start_pt[i] = matrix_start_pt[i];
2104 }
2105
2106 // Delete the matrix if we are allowed
2107 if (Dist_delete_matrix_data == true)
2108 {
2109 serial_matrix_pt->clean_up_memory();
2110 }
2111
2112 // do the factorization
2114 1,
2115 allow_permutations,
2116 ndof,
2117 nnz,
2121 0,
2122 Dist_nprow,
2123 Dist_npcol,
2124 doc,
2126 &Dist_info,
2127 this->distribution_pt()->communicator_pt()->mpi_comm());
2128
2129 // Record that data is stored
2131 }
2132 // Otherwise throw an error
2133 else
2134 {
2135 std::ostringstream error_message_stream;
2136 error_message_stream << "SuperLUSolver implemented only for "
2137 << " CCDoubleMatrix, CRDoubleMatrix\n"
2138 << "and DistributedCRDoubleMatrix matrices\n";
2139 throw OomphLibError(error_message_stream.str(),
2140 OOMPH_CURRENT_FUNCTION,
2141 OOMPH_EXCEPTION_LOCATION);
2142 }
2143
2144 // Throw an error if superLU returned an error status in info.
2145 if (Dist_info != 0)
2146 {
2147 std::ostringstream error_msg;
2148 error_msg << "SuperLU returned the error status code " << Dist_info
2149 << " . See the SuperLU documentation for what this means.";
2150 throw OomphLibError(
2151 error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2152 }
2153 }
2154#endif
2155
2156 //===================================================================
2157 /// LU decompose the matrix addressed by matrix_pt by using
2158 /// the SuperLU solver. The resulting matrix factors are stored
2159 /// internally.
2160 //===================================================================
2162 {
2163#ifdef PARANOID
2164 // PARANOID check that if the matrix is distributable then it should not be
2165 // then it should not be distributed
2166 if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
2167 {
2168 if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
2169 ->distributed())
2170 {
2171 std::ostringstream error_message_stream;
2172 error_message_stream << "The matrix must not be distributed.";
2173 throw OomphLibError(error_message_stream.str(),
2174 OOMPH_CURRENT_FUNCTION,
2175 OOMPH_EXCEPTION_LOCATION);
2176 }
2177 }
2178#endif
2179
2180 // Find # of degrees of freedom (variables)
2181 int n = matrix_pt->nrow();
2182
2183 // Check that we have a square matrix
2184#ifdef PARANOID
2185 int m = matrix_pt->ncol();
2186 if (n != m)
2187 {
2188 std::ostringstream error_message_stream;
2189 error_message_stream << "Can only solve for square matrices\n"
2190 << "N, M " << n << " " << m << std::endl;
2191
2192 throw OomphLibError(error_message_stream.str(),
2193 OOMPH_CURRENT_FUNCTION,
2194 OOMPH_EXCEPTION_LOCATION);
2195 }
2196#endif
2197
2198 // Storage for the values, rows and column indices
2199 // required by SuplerLU
2200 double* value = 0;
2201 int *index = 0, *start = 0;
2202
2203 // Integer used to represent compressed row or column format
2204 // Default compressed row
2205 int transpose = 0;
2206
2207 // Number of non-zero entries in the matrix
2208 int nnz = 0;
2209
2210 // Doc flag (convert to int for SuperLU)
2211 int doc = Doc_stats;
2212
2213 // Is it a CR matrix
2214 if (dynamic_cast<CRDoubleMatrix*>(matrix_pt))
2215 {
2216 // Set the appropriate row flags
2218 transpose = 1;
2219 // Get a cast pointer to the matrix
2220 CRDoubleMatrix* CR_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
2221
2222 // Now set the pointers to the interanally stored values
2223 // and indices
2224 nnz = CR_matrix_pt->nnz();
2225 value = CR_matrix_pt->value();
2226 index = CR_matrix_pt->column_index();
2227 start = CR_matrix_pt->row_start();
2228 }
2229 // Otherwise is it the compressed column version?
2230 else if (dynamic_cast<CCDoubleMatrix*>(matrix_pt))
2231 {
2232 // Set the compressed row flag to false
2234 // Get a cast pointer to the matrix
2235 CCDoubleMatrix* CC_matrix_pt = dynamic_cast<CCDoubleMatrix*>(matrix_pt);
2236
2237 // Now set the pointers to the interanally stored values
2238 // and indices
2239 nnz = CC_matrix_pt->nnz();
2240 value = CC_matrix_pt->value();
2241 index = CC_matrix_pt->row_index();
2242 start = CC_matrix_pt->column_start();
2243 }
2244 // Otherwise throw and error
2245 else
2246 {
2247 throw OomphLibError("SuperLU only works with CR or CC Double matrices",
2248 OOMPH_CURRENT_FUNCTION,
2249 OOMPH_EXCEPTION_LOCATION);
2250 }
2251
2252 // Clean up any previous storage so that if this is called twice with
2253 // the same matrix, we don't get a memory leak
2255
2256 // Perform the lu decompose phase (i=1)
2257 int i = 1;
2259 &n,
2260 &nnz,
2261 0,
2262 value,
2263 index,
2264 start,
2265 0,
2266 &n,
2267 &transpose,
2268 &doc,
2270 &Serial_info);
2271
2272 // Throw an error if superLU returned an error status in info.
2273 if (Serial_info != 0)
2274 {
2275 std::ostringstream error_msg;
2276 error_msg << "SuperLU returned the error status code " << Serial_info
2277 << " . See the SuperLU documentation for what this means.";
2278 throw OomphLibError(
2279 error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2280 }
2281
2282
2283 // Set the number of degrees of freedom in the linear system
2284 Serial_n_dof = n;
2285 }
2286
2287 //=============================================================================
2288 /// Do the backsubstitution for SuperLUSolver.
2289 /// Note - this method performs no paranoid checks - these are all performed
2290 /// in solve(...) and resolve(...)
2291 //=============================================================================
2293 {
2294#ifdef OOMPH_HAS_MPI
2295 if (Using_dist)
2296 {
2297 backsub_distributed(rhs, result);
2298 }
2299 else
2300#endif
2301 {
2302 backsub_serial(rhs, result);
2303 }
2304 }
2305
2306
2307 //=============================================================================
2308 /// Do the backsubstitution of the transposed system for SuperLUSolver.
2309 /// Note - this method performs no paranoid checks - these are all performed
2310 /// in solve(...) and resolve(...)
2311 //=============================================================================
2313 DoubleVector& result)
2314 {
2315#ifdef OOMPH_HAS_MPI
2316 if (Using_dist)
2317 {
2318 backsub_transpose_distributed(rhs, result);
2319 }
2320 else
2321#endif
2322 {
2323 backsub_transpose_serial(rhs, result);
2324 }
2325 }
2326
2327#ifdef OOMPH_HAS_MPI
2328 //=========================================================================
2329 /// Static warning to suppress warnings about incorrect distribution of
2330 /// RHS vector. Default is false
2331 //=========================================================================
2333 false;
2334
2335 //=============================================================================
2336 /// Do the backsubstitution for SuperLU solver.
2337 /// Note - this method performs no paranoid checks - these are all performed
2338 /// in solve(...) and resolve(...)
2339 //=============================================================================
2341 DoubleVector& result)
2342 {
2343#ifdef PARANOID
2344 // check that the rhs vector is setup
2345 if (!rhs.distribution_pt()->built())
2346 {
2347 std::ostringstream error_message_stream;
2348 error_message_stream << "The vectors rhs must be setup";
2349 throw OomphLibError(error_message_stream.str(),
2350 OOMPH_CURRENT_FUNCTION,
2351 OOMPH_EXCEPTION_LOCATION);
2352 }
2353#endif
2354 // check that the rhs distribution is the same as the distribution as this
2355 // solver. If not redistribute and issue a warning
2356 LinearAlgebraDistribution rhs_distribution(rhs.distribution_pt());
2357 if (!(*rhs.distribution_pt() == *this->distribution_pt()))
2358 {
2360 {
2361 std::ostringstream warning_stream;
2362 warning_stream << "The distribution of rhs vector does not match that "
2363 "ofthe solver.\n";
2364 warning_stream << "The rhs will be redistributed, which is likely to "
2365 "be inefficient\n";
2366 warning_stream
2367 << "To remove this warning you can either:\n"
2368 << " i) Ensure that the rhs vector has the correct distribution\n"
2369 << " before calling the resolve() function\n"
2370 << "or ii) Set the flag \n"
2371 << " SuperLUSolver::Suppress_incorrect_rhs_distribution_warning_in_"
2372 "resolve\n"
2373 << " to be true\n\n";
2374
2375 OomphLibWarning(warning_stream.str(),
2376 "SuperLUSolver::resolve()",
2377 OOMPH_EXCEPTION_LOCATION);
2378 }
2379
2380 // Have to cast away const-ness (which tells us that we shouldn't really
2381 // be doing this!)
2382 const_cast<DoubleVector&>(rhs).redistribute(this->distribution_pt());
2383 }
2384
2385#ifdef PARANOID
2386 // if the result vector is setup then check it has the same distribution
2387 // as the rhs
2388 if (result.distribution_built())
2389 {
2390 if (!(*result.distribution_pt() == *rhs.distribution_pt()))
2391 {
2392 std::ostringstream error_message_stream;
2393 error_message_stream
2394 << "The result vector distribution has been setup; it must have the "
2395 << "same distribution as the rhs vector.";
2396 throw OomphLibError(error_message_stream.str(),
2397 OOMPH_CURRENT_FUNCTION,
2398 OOMPH_EXCEPTION_LOCATION);
2399 }
2400 }
2401#endif
2402 // Doc (0/1) = (true/false)
2403 int doc = !Doc_stats;
2404
2405 // Reset Info
2406 Dist_info = 0;
2407
2408 // number of DOFs
2409 int ndof = this->distribution_pt()->nrow();
2410
2411 // Copy the rhs values to result
2412 result = rhs;
2413
2414 // Do the backsubsitition phase
2416 {
2417 // Call distributed solver
2419 2,
2420 -1,
2421 ndof,
2422 0,
2423 0,
2424 0,
2425 0,
2426 0,
2427 0,
2428 result.values_pt(),
2429 Dist_nprow,
2430 Dist_npcol,
2431 doc,
2433 &Dist_info,
2434 this->distribution_pt()->communicator_pt()->mpi_comm());
2435 }
2437 {
2438 // Call global solver
2440 2,
2441 -1,
2442 ndof,
2443 0,
2444 0,
2445 0,
2446 0,
2447 result.values_pt(),
2448 Dist_nprow,
2449 Dist_npcol,
2450 doc,
2452 &Dist_info,
2453 this->distribution_pt()->communicator_pt()->mpi_comm());
2454 }
2455 else
2456 {
2457 throw OomphLibError("The matrix factors have not been stored",
2458 OOMPH_CURRENT_FUNCTION,
2459 OOMPH_EXCEPTION_LOCATION);
2460 }
2461
2462 // Throw an error if superLU returned an error status in info.
2463 if (Dist_info != 0)
2464 {
2465 std::ostringstream error_msg;
2466 error_msg << "SuperLU returned the error status code " << Dist_info
2467 << " . See the SuperLU documentation for what this means.";
2468 throw OomphLibError(
2469 error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2470 }
2471
2472 // Redistribute to original distribution
2473 // Have to cast away const-ness (which tells us that we shouldn't really
2474 // be doing this!)
2475 const_cast<DoubleVector&>(rhs).redistribute(&rhs_distribution);
2476 }
2477
2478 //=============================================================================
2479 /// Do the backsubstitution for SuperLU solver.
2480 /// Note - this method performs no paranoid checks - these are all performed
2481 /// in solve(...) and resolve(...)
2482 //=============================================================================
2484 DoubleVector& result)
2485 {
2486 // Create an output stream
2487 std::ostringstream error_message_stream;
2488
2489 // Create the error message
2490 error_message_stream << "This function hasn't been implemented yet. If you "
2491 << "need it, implement it!" << std::endl;
2492
2493 // Throw the error message
2494 throw OomphLibError(error_message_stream.str(),
2495 OOMPH_CURRENT_FUNCTION,
2496 OOMPH_EXCEPTION_LOCATION);
2497 }
2498#endif
2499
2500 //================================================================
2501 /// Do the backsubstitution for SuperLU
2502 //================================================================
2504 DoubleVector& result)
2505 {
2506 // Find the number of unknowns
2507 int n = rhs.nrow();
2508
2509#ifdef PARANOID
2510 // PARANOID check that this rhs distribution is setup
2511 if (!rhs.built())
2512 {
2513 std::ostringstream error_message_stream;
2514 error_message_stream << "The rhs vector distribution must be setup.";
2515 throw OomphLibError(error_message_stream.str(),
2516 OOMPH_CURRENT_FUNCTION,
2517 OOMPH_EXCEPTION_LOCATION);
2518 }
2519 // PARANOID check that the rhs has the right number of global rows
2520 if (static_cast<int>(Serial_n_dof) != n)
2521 {
2522 throw OomphLibError(
2523 "RHS does not have the same dimension as the linear system",
2524 OOMPH_CURRENT_FUNCTION,
2525 OOMPH_EXCEPTION_LOCATION);
2526 }
2527 // PARANOID check that the rhs is not distributed
2528 if (rhs.distribution_pt()->distributed())
2529 {
2530 std::ostringstream error_message_stream;
2531 error_message_stream << "The rhs vector must not be distributed.";
2532 throw OomphLibError(error_message_stream.str(),
2533 OOMPH_CURRENT_FUNCTION,
2534 OOMPH_EXCEPTION_LOCATION);
2535 }
2536 // PARANOID check that if the result is setup it matches the distribution
2537 // of the rhs
2538 if (result.built())
2539 {
2540 if (!(*rhs.distribution_pt() == *result.distribution_pt()))
2541 {
2542 std::ostringstream error_message_stream;
2543 error_message_stream << "If the result distribution is setup then it "
2544 "must be the same as the "
2545 << "rhs distribution";
2546 throw OomphLibError(error_message_stream.str(),
2547 OOMPH_CURRENT_FUNCTION,
2548 OOMPH_EXCEPTION_LOCATION);
2549 }
2550 }
2551#endif
2552
2553 // copy result to rhs
2554 result = rhs;
2555
2556 // Number of RHSs
2557 int nrhs = 1;
2558
2559 // Cast the boolean flags to ints for SuperLU
2560 int transpose = Serial_compressed_row_flag;
2561 int doc = Doc_stats;
2562
2563 // Do the backsubsitition phase
2564 int i = 2;
2565 superlu(&i,
2566 &n,
2567 0,
2568 &nrhs,
2569 0,
2570 0,
2571 0,
2572 result.values_pt(),
2573 &n,
2574 &transpose,
2575 &doc,
2577 &Serial_info);
2578
2579 // Throw an error if superLU returned an error status in info.
2580 if (Serial_info != 0)
2581 {
2582 std::ostringstream error_msg;
2583 error_msg << "SuperLU returned the error status code " << Serial_info
2584 << " . See the SuperLU documentation for what this means.";
2585 throw OomphLibError(
2586 error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2587 }
2588 }
2589
2590 //================================================================
2591 /// Do the backsubstitution for SuperLU
2592 //================================================================
2594 DoubleVector& result)
2595 {
2596 // Find the number of unknowns
2597 int n = rhs.nrow();
2598
2599#ifdef PARANOID
2600 // PARANOID check that this rhs distribution is setup
2601 if (!rhs.built())
2602 {
2603 std::ostringstream error_message_stream;
2604 error_message_stream << "The rhs vector distribution must be setup.";
2605 throw OomphLibError(error_message_stream.str(),
2606 OOMPH_CURRENT_FUNCTION,
2607 OOMPH_EXCEPTION_LOCATION);
2608 }
2609 // PARANOID check that the rhs has the right number of global rows
2610 if (static_cast<int>(Serial_n_dof) != n)
2611 {
2612 throw OomphLibError(
2613 "RHS does not have the same dimension as the linear system",
2614 OOMPH_CURRENT_FUNCTION,
2615 OOMPH_EXCEPTION_LOCATION);
2616 }
2617 // PARANOID check that the rhs is not distributed
2618 if (rhs.distribution_pt()->distributed())
2619 {
2620 std::ostringstream error_message_stream;
2621 error_message_stream << "The rhs vector must not be distributed.";
2622 throw OomphLibError(error_message_stream.str(),
2623 OOMPH_CURRENT_FUNCTION,
2624 OOMPH_EXCEPTION_LOCATION);
2625 }
2626 // PARANOID check that if the result is setup it matches the distribution
2627 // of the rhs
2628 if (result.built())
2629 {
2630 if (!(*rhs.distribution_pt() == *result.distribution_pt()))
2631 {
2632 std::ostringstream error_message_stream;
2633 error_message_stream << "If the result distribution is setup then it "
2634 "must be the same as the "
2635 << "rhs distribution";
2636 throw OomphLibError(error_message_stream.str(),
2637 OOMPH_CURRENT_FUNCTION,
2638 OOMPH_EXCEPTION_LOCATION);
2639 }
2640 }
2641#endif
2642
2643 // copy result to rhs
2644 result = rhs;
2645
2646 // Number of RHSs
2647 int nrhs = 1;
2648
2649 // Cast the boolean flags to ints for SuperLU
2650 int transpose = (!Serial_compressed_row_flag);
2651 int doc = Doc_stats;
2652
2653 // Do the backsubsitition phase
2654 int i = 2;
2655 superlu(&i,
2656 &n,
2657 0,
2658 &nrhs,
2659 0,
2660 0,
2661 0,
2662 result.values_pt(),
2663 &n,
2664 &transpose,
2665 &doc,
2667 &Serial_info);
2668
2669 // Throw an error if superLU returned an error status in info.
2670 if (Serial_info != 0)
2671 {
2672 std::ostringstream error_msg;
2673 error_msg << "SuperLU returned the error status code " << Serial_info
2674 << " . See the SuperLU documentation for what this means.";
2675 throw OomphLibError(
2676 error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2677 }
2678 }
2679
2680 //=============================================================================
2681 /// Clean up the memory
2682 //=============================================================================
2684 {
2685 // If we have non-zero LU factors stored
2686 if (Serial_f_factors != 0)
2687 {
2688 // Clean up those factors
2689 int i = 3;
2690 int transpose = Serial_compressed_row_flag;
2691 superlu(&i,
2692 0,
2693 0,
2694 0,
2695 0,
2696 0,
2697 0,
2698 0,
2699 0,
2700 &transpose,
2701 0,
2703 &Serial_info);
2704
2705 // Set the F_factors to zero
2706 Serial_f_factors = 0;
2707 Serial_n_dof = 0;
2708 }
2709
2710#ifdef OOMPH_HAS_MPI
2711 // If we have non-zero LU factors stored
2712 if (Dist_solver_data_pt != 0)
2713 {
2714 // Clean up any stored solver data
2715
2716 // Doc (0/1) = (true/false)
2717 int doc = !Doc_stats;
2718
2719 // Reset Info flag
2720 Dist_info = 0;
2721
2722 // number of DOFs
2723 int ndof = this->distribution_pt()->nrow();
2724
2726 {
2728 3,
2729 -1,
2730 ndof,
2731 0,
2732 0,
2733 0,
2734 0,
2735 0,
2736 0,
2737 0,
2738 Dist_nprow,
2739 Dist_npcol,
2740 doc,
2742 &Dist_info,
2743 this->distribution_pt()->communicator_pt()->mpi_comm());
2745 }
2747 {
2749 3,
2750 -1,
2751 ndof,
2752 0,
2753 0,
2754 0,
2755 0,
2756 0,
2757 Dist_nprow,
2758 Dist_npcol,
2759 doc,
2761 &Dist_info,
2762 this->distribution_pt()->communicator_pt()->mpi_comm());
2764 }
2765
2767
2768 // Delete internal copy of the matrix
2769 delete[] Dist_value_pt;
2770 delete[] Dist_index_pt;
2771 delete[] Dist_start_pt;
2772 Dist_value_pt = 0;
2773 Dist_index_pt = 0;
2774 Dist_start_pt = 0;
2775
2776 // and the distribution
2777 this->clear_distribution();
2778 }
2779#endif
2780 }
2781
2782} // namespace oomph
cstr elem_len * i
Definition: cfortran.h:603
//////////////////////////////////////////////////////////////// ////////////////////////////////////...
Definition: matrices.h:2791
void multiply_transpose(const DoubleVector &x, DoubleVector &soln) const
Multiply the transposed matrix by the vector x: soln=A^T x.
Definition: matrices.cc:715
unsigned long nrow() const
Return the number of rows of the matrix.
Definition: matrices.h:2817
int * column_start()
Access to C-style column_start array.
Definition: matrices.h:2692
int * row_index()
Access to C-style row index array.
Definition: matrices.h:2704
void clean_up_memory()
Wipe matrix data and set all values to 0.
Definition: matrices.h:3169
A class for compressed row matrices. This is a distributable object.
Definition: matrices.h:888
int * row_start()
Access to C-style row_start array.
Definition: matrices.h:1060
void multiply_transpose(const DoubleVector &x, DoubleVector &soln) const
Multiply the transposed matrix by the vector x: soln=A^T x.
Definition: matrices.cc:1882
int * column_index()
Access to C-style column index array.
Definition: matrices.h:1072
unsigned long nnz() const
Return the number of nonzero entries (the local nnz)
Definition: matrices.h:1096
bool built() const
access function to the Built flag - indicates whether the matrix has been build - i....
Definition: matrices.h:1210
double * value()
Access to C-style value array.
Definition: matrices.h:1084
unsigned long nrow() const
Return the number of rows of the matrix.
Definition: matrices.h:1002
Class of matrices containing doubles, and stored as a DenseMatrix<double>, but with solving functiona...
Definition: matrices.h:1271
double Jacobian_setup_time
Jacobian setup time.
void backsub(const DoubleVector &rhs, DoubleVector &result)
Do the backsubstitution step to solve the system LU result = rhs.
void clean_up_memory()
Clean up the stored LU factors.
long * Index
Pointer to storage for the index of permutations in the LU solve.
void factorise(DoubleMatrixBase *const &matrix_pt)
Perform the LU decomposition of the matrix.
double * LU_factors
Pointer to storage for the LU decomposition.
double Solution_time
Solution time.
void solve(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
int Sign_of_determinant_of_matrix
Sign of the determinant of the matrix (obtained during the LU decomposition)
Base class for any linear algebra object that is distributable. Just contains storage for the LinearA...
void clear_distribution()
clear the distribution of this distributable linear algebra object
bool distributed() const
distribution is serial or distributed
unsigned nrow() const
access function to the number of global rows.
bool distribution_built() const
if the communicator_pt is null then the distribution is not setup then false is returned,...
unsigned nrow_local() const
access function for the num of local rows on this processor.
unsigned first_row() const
access function for the first row on this processor
LinearAlgebraDistribution * distribution_pt() const
access to the LinearAlgebraDistribution
void build_distribution(const LinearAlgebraDistribution *const dist_pt)
setup the distribution of this distributable linear algebra object
Abstract base class for matrices of doubles – adds abstract interfaces for solving,...
Definition: matrices.h:261
virtual unsigned long ncol() const =0
Return the number of columns of the matrix.
virtual unsigned long nrow() const =0
Return the number of rows of the matrix.
A vector in the mathematical sense, initially developed for linear algebra type applications....
Definition: double_vector.h:58
void build(const DoubleVector &old_vector)
Just copys the argument DoubleVector.
void redistribute(const LinearAlgebraDistribution *const &dist_pt)
The contents of the vector are redistributed to match the new distribution. In a non-MPI rebuild this...
bool built() const
double * values_pt()
access function to the underlying values
void solve(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
Describes the distribution of a distributable linear algebra type object. Typically this is a contain...
bool distributed() const
access function to the distributed - indicates whether the distribution is serial or distributed
OomphCommunicator * communicator_pt() const
const access to the communicator pointer
bool built() const
if the communicator_pt is null then the distribution is not setup then false is returned,...
unsigned nrow() const
access function to the number of global rows.
bool Doc_time
Boolean flag that indicates whether the time taken.
Definition: linear_solver.h:77
bool Enable_resolve
Boolean that indicates whether the matrix (or its factors, in the case of direct solver) should be st...
Definition: linear_solver.h:73
bool Gradient_has_been_computed
flag that indicates whether the gradient was computed or not
Definition: linear_solver.h:84
DoubleVector Gradient_for_glob_conv_newton_solve
DoubleVector storing the gradient for the globally convergent Newton method.
Definition: linear_solver.h:88
bool Compute_gradient
flag that indicates whether the gradient required for the globally convergent Newton method should be...
Definition: linear_solver.h:81
static bool mpi_has_been_initialised()
return true if MPI has been initialised
static OomphCommunicator * communicator_pt()
access to global communicator. This is the oomph-lib equivalent of MPI_COMM_WORLD
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...
Definition: communicator.h:54
An OomphLibError object which should be thrown when an run-time error is encountered....
An OomphLibWarning object which should be created as a temporary object to issue a warning....
////////////////////////////////////////////////////////////////// //////////////////////////////////...
Definition: problem.h:151
unsigned long ndof() const
Return the number of dofs.
Definition: problem.h:1674
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
Definition: problem.h:1246
virtual void get_jacobian(DoubleVector &residuals, DenseDoubleMatrix &jacobian)
Return the fully-assembled Jacobian and residuals for the problem Interface for the case when the Jac...
Definition: problem.cc:3890
void get_fd_jacobian(DoubleVector &residuals, DenseMatrix< double > &jacobian)
Return the fully-assembled Jacobian and residuals, generated by finite differences.
Definition: problem.cc:7823
int & sign_of_jacobian()
Access function for the sign of the global jacobian matrix. This will be set by the linear solver,...
Definition: problem.h:2424
T * value()
Access to C-style value array.
Definition: matrices.h:616
unsigned long nnz() const
Return the number of nonzero entries.
Definition: matrices.h:640
bool Doc_stats
Set to true to output statistics (false by default).
bool Dist_delete_matrix_data
Delete_matrix_data flag. SuperLU_dist needs its own copy of the input matrix, therefore a copy must b...
void solve_transpose(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
static bool Suppress_incorrect_rhs_distribution_warning_in_resolve
Static flag that determines whether the warning about incorrect distribution of RHSs will be printed ...
int * Dist_index_pt
Pointer for storage of matrix rows or column indices required by SuperLU_DIST.
Type Solver_type
the solver type. see SuperLU_solver_type for details.
void backsub_serial(const DoubleVector &rhs, DoubleVector &result)
backsub method for SuperLU (serial)
void backsub_transpose_distributed(const DoubleVector &rhs, DoubleVector &result)
backsub method for SuperLU Dist
double Solution_time
Solution time.
double * Dist_value_pt
Pointer for storage of the matrix values required by SuperLU_DIST.
bool Serial_compressed_row_flag
Use compressed row version?
void solve(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
void factorise(DoubleMatrixBase *const &matrix_pt)
Do the factorisation stage Note: if Delete_matrix_data is true the function matrix_pt->clean_up_memor...
void factorise_serial(DoubleMatrixBase *const &matrix_pt)
factorise method for SuperLU (serial)
void backsub_distributed(const DoubleVector &rhs, DoubleVector &result)
backsub method for SuperLU Dist
bool Using_dist
boolean flag indicating whether superlu dist is being used
int * Dist_start_pt
Pointers for storage of matrix column or row starts.
unsigned long Serial_n_dof
The number of unknowns in the linear system.
void * Serial_f_factors
Storage for the LU factors as required by SuperLU.
void resolve_transpose(const DoubleVector &rhs, DoubleVector &result)
Resolve the (transposed) system defined by the last assembled Jacobian and the specified rhs vector i...
bool Suppress_solve
Suppress solve?
int Serial_sign_of_determinant_of_matrix
Sign of the determinant of the matrix.
bool Dist_use_global_solver
Flag that determines whether the MPIProblem based solve function uses the global or distributed versi...
double get_memory_usage_for_lu_factors()
How much memory do the LU factors take up? In bytes.
void factorise_distributed(DoubleMatrixBase *const &matrix_pt)
factorise method for SuperLU Dist
bool Dist_allow_row_and_col_permutations
If true then SuperLU_DIST is allowed to permute matrix rows and columns during factorisation....
int Dist_info
Info flag for the SuperLU solver.
bool Dist_global_solve_data_allocated
Flag is true if solve data has been generated for a global matrix.
double get_total_needed_memory()
How much memory was allocated by SuperLU? In bytes.
double Jacobian_setup_time
Jacobian setup time.
void backsub_transpose_serial(const DoubleVector &rhs, DoubleVector &result)
backsub method for SuperLU (serial)
void resolve(const DoubleVector &rhs, DoubleVector &result)
Resolve the system defined by the last assembled jacobian and the specified rhs vector if resolve has...
void * Dist_solver_data_pt
Storage for the LU factors and other data required by SuperLU.
bool Dist_distributed_solve_data_allocated
Flag is true if solve data has been generated for distributed matrix.
int Dist_npcol
Number of columns for the process grid.
void backsub(const DoubleVector &rhs, DoubleVector &result)
Do the backsubstitution for SuperLU solver Note: returns the global result Vector.
void clean_up_memory()
Clean up the memory allocated by the solver.
int Dist_nprow
Number of rows for the process grid.
void backsub_transpose(const DoubleVector &rhs, DoubleVector &result)
Do the back-substitution for transposed system of the SuperLU solver Note: Returns the global result ...
int Serial_info
Info flag for the SuperLU solver.
void start(const unsigned &i)
(Re-)start i-th timer
double timer()
returns the time in seconds after some point in past
std::string convert_secs_to_formatted_string(const double &time_in_sec)
Returns a nicely formatted string from an input time in seconds; the format depends on the size of ti...
//////////////////////////////////////////////////////////////////// ////////////////////////////////...
double get_total_memory_usage_in_bytes()
Function to calculate the number of bytes used in calculating and storing the LU factors.
double get_lu_factor_memory_usage_in_bytes_dist()
Function to calculate the number of bytes used to store the LU factors.
double get_total_memory_usage_in_bytes_dist()
Function to calculate the number of bytes used in calculating and storing the LU factors.
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n, int nnz, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
int superlu(int *, int *, int *, int *, double *, int *, int *, double *, int *, int *, int *, void *, int *)
OomphInfo oomph_info
Single (global) instantiation of the OomphInfo object – this is used throughout the library as a "rep...
double get_lu_factor_memory_usage_in_bytes()
Function to calculate the number of bytes used to store the LU factors.
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)