oomph-lib: linear_solver.cc Source File

Go to the documentation of this file.
 // LIC// ====================================================================
 // LIC// This file forms part of oomph-lib, the object-oriented,
 // LIC// multi-physics finite-element library, available
 // LIC// at http://www.oomph-lib.org.
 // LIC//
 // LIC// Copyright (C) 2006-2024 Matthias Heil and Andrew Hazel
 // LIC//
 // LIC// This library is free software; you can redistribute it and/or
 // LIC// modify it under the terms of the GNU Lesser General Public
 // LIC// License as published by the Free Software Foundation; either
 // LIC// version 2.1 of the License, or (at your option) any later version.
 // LIC//
 // LIC// This library is distributed in the hope that it will be useful,
 // LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
 // LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // LIC// Lesser General Public License for more details.
 // LIC//
 // LIC// You should have received a copy of the GNU Lesser General Public
 // LIC// License along with this library; if not, write to the Free Software
 // LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 // LIC// 02110-1301  USA.
 // LIC//
 // LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
 // LIC//
 // LIC//====================================================================
 // The actual solve functions for dense LU linear solvers.
  
 // Config header generated by autoconfig
 #ifdef HAVE_CONFIG_H
 #include <oomph-lib-config.h>
 #endif
  
 #ifdef OOMPH_HAS_MPI
 #include "mpi.h"
 #endif
  
 // oomph-lib includes
 #include "Vector.h"
 #include "linear_solver.h"
 #include "matrices.h"
 #include "problem.h"
  
  
 namespace oomph
 {
   //=============================================================================
   /// Solver: Takes pointer to problem and returns the results Vector
   /// which contains the solution of the linear system defined by
   /// the problem's fully assembled Jacobian and residual Vector.
   //=============================================================================
   void DenseLU::solve(Problem* const& problem_pt, DoubleVector& result)
   {
     // Initialise timer
     double t_start = TimingHelpers::timer();
  
     // Find # of degrees of freedom (variables)
     const unsigned n_dof = problem_pt->ndof();
  
     // Allocate storage for the residuals vector and the jacobian matrix
     DoubleVector residuals;
     DenseDoubleMatrix jacobian(n_dof);
  
     // initialise timer
     double t_start_jacobian = TimingHelpers::timer();
  
     // Get the full jacobian and residuals of the problem
     problem_pt->get_jacobian(residuals, jacobian);
  
     // compute jacobian setup time
     double t_end_jacobian = TimingHelpers::timer();
     Jacobian_setup_time = t_end_jacobian - t_start_jacobian;
  
     // Report the time
     if (Doc_time)
     {
       oomph_info << std::endl
                  << "CPU for setup of Dense Jacobian: "
                  << TimingHelpers::convert_secs_to_formatted_string(
                       Jacobian_setup_time)
                  << std::endl;
     }
  
     // Solve by dense LU decomposition VERY INEFFICIENT!
     solve(&jacobian, residuals, result);
  
     // Set the sign of the determinant of the jacobian
     problem_pt->sign_of_jacobian() = Sign_of_determinant_of_matrix;
  
     // Finalise/doc timings
     double t_end = TimingHelpers::timer();
     double total_time = t_end - t_start;
     if (Doc_time)
     {
       oomph_info << "CPU for DenseLU LinearSolver: "
                  << TimingHelpers::convert_secs_to_formatted_string(total_time)
                  << std::endl
                  << std::endl;
     }
   }
  
  
   //=============================================================================
   /// Delete the storage that has been allocated for the LU factors, if
   /// the matrix data is not itself being overwritten.
   //=============================================================================
   void DenseLU::clean_up_memory()
   {
     // delete the Distribution_pt
     this->clear_distribution();
  
     // Clean up the LU factor storage, if it has been allocated
     // N.B. we don't need to check the index storage as well.
     if (LU_factors != 0)
     {
       // Delete the pointer to the LU factors
       delete[] LU_factors;
       // Null out the vector
       LU_factors = 0;
       // Delete the pointer to the Index
       delete[] Index;
       // Null out
       Index = 0;
     }
   }
  
   //=============================================================================
   /// LU decompose the matrix.
   /// WARNING: this class does not perform any PARANOID checks on the vectors -
   /// these are all performed in the solve(...) method.
   //=============================================================================
   void DenseLU::factorise(DoubleMatrixBase* const& matrix_pt)
   {
     // Set the number of unknowns
     const unsigned long n = matrix_pt->nrow();
  
     // Small constant
     const double small_number = 1.0e-20;
  
     // Vector scaling stores the implicit scaling of each row
     Vector<double> scaling(n);
  
     // Integer to store the sign that must multiply the determinant as
     // a consequence of the row/column interchanges
     int signature = 1;
  
     // Loop over rows to get implicit scaling information
     for (unsigned long i = 0; i < n; i++)
     {
       double largest_entry = 0.0;
       for (unsigned long j = 0; j < n; j++)
       {
         double tmp = std::fabs((*matrix_pt)(i, j));
         if (tmp > largest_entry) largest_entry = tmp;
       }
       if (largest_entry == 0.0)
       {
         throw OomphLibError(
           "Singular Matrix", OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
       }
       // Save the scaling
       scaling[i] = 1.0 / largest_entry;
     }
  
     // Firsly, we shall delete any previous LU storage.
     // If the user calls this function twice without changing the matrix
     // then it is their own inefficiency, not ours (this time).
     clean_up_memory();
  
     // Allocate storage for the LU factors, the index and store
     // the number of unknowns
     LU_factors = new double[n * n];
     Index = new long[n];
  
     // Now we know that memory has been allocated, copy over
     // the matrix values
     unsigned count = 0;
     for (unsigned long i = 0; i < n; i++)
     {
       for (unsigned long j = 0; j < n; j++)
       {
         LU_factors[count] = (*matrix_pt)(i, j);
         ++count;
       }
     }
  
     // Loop over columns
     for (unsigned long j = 0; j < n; j++)
     {
       // Initialise imax
       unsigned long imax = 0;
  
       for (unsigned long i = 0; i < j; i++)
       {
         double sum = LU_factors[n * i + j];
         for (unsigned long k = 0; k < i; k++)
         {
           sum -= LU_factors[n * i + k] * LU_factors[n * k + j];
         }
         LU_factors[n * i + j] = sum;
       }
  
       // Initialise search for largest pivot element
       double largest_entry = 0.0;
       for (unsigned long i = j; i < n; i++)
       {
         double sum = LU_factors[n * i + j];
         for (unsigned long k = 0; k < j; k++)
         {
           sum -= LU_factors[n * i + k] * LU_factors[n * k + j];
         }
         LU_factors[n * i + j] = sum;
         // Set temporary
         double tmp = scaling[i] * std::fabs(sum);
         if (tmp >= largest_entry)
         {
           largest_entry = tmp;
           imax = i;
         }
       }
  
       // Test to see if we need to interchange rows
       if (j != imax)
       {
         for (unsigned long k = 0; k < n; k++)
         {
           double tmp = LU_factors[n * imax + k];
           LU_factors[n * imax + k] = LU_factors[n * j + k];
           LU_factors[n * j + k] = tmp;
         }
         // Change the parity of signature
         signature = -signature;
  
         // Interchange scale factor
         scaling[imax] = scaling[j];
       }
  
       // Set the index
       Index[j] = imax;
       if (LU_factors[n * j + j] == 0.0)
       {
         LU_factors[n * j + j] = small_number;
       }
       // Divide by pivot element
       if (j != n - 1)
       {
         double tmp = 1.0 / LU_factors[n * j + j];
         for (unsigned long i = j + 1; i < n; i++)
         {
           LU_factors[n * i + j] *= tmp;
         }
       }
  
     } // End of loop over columns
  
  
     // Now multiply all the diagonal terms together to get the determinant
     // Note that we need to use the mantissa, exponent formulation to
     // avoid underflow errors
     double determinant_mantissa = 1.0;
     int determinant_exponent = 0, iexp;
     for (unsigned i = 0; i < n; i++)
     {
       // Multiply by the next diagonal entry's mantissa
       // and return the exponent
       determinant_mantissa *= frexp(LU_factors[n * i + i], &iexp);
  
       // Add the new exponent to the current exponent
       determinant_exponent += iexp;
  
       // normalise
       determinant_mantissa = frexp(determinant_mantissa, &iexp);
       determinant_exponent += iexp;
     }
  
     // If paranoid issue a warning that the matrix is near singular
     // #ifdef PARANOID
     //  int tiny_exponent = -60;
     //  if(determinant_exponent < tiny_exponent)
     //   {
     //    std::ostringstream warning_stream;
     //    warning_stream << "The determinant of the matrix is very close to
     //    zero.\n"
     //                   << "It is " << determinant_mantissa << " x 2^"
     //                   << determinant_exponent << "\n";
     //    warning_stream << "The results will depend on the exact details of
     //    the\n"
     //                   << "floating point implementation ... just to let you
     //                   know\n";
     //    OomphLibWarning(warning_stream.str(),
     //                    "DenseLU::factorise()",
     //                    OOMPH_EXCEPTION_LOCATION);
     //   }
     // #endif
  
     // Integer to store the sign of the determinant
     int sign = 0;
  
     // Find the sign of the determinant
     if (determinant_mantissa > 0.0)
     {
       sign = 1;
     }
     if (determinant_mantissa < 0.0)
     {
       sign = -1;
     }
  
     // Multiply the sign by the signature
     sign *= signature;
  
     // Return the sign of the determinant
     Sign_of_determinant_of_matrix = sign;
   }
  
   //=============================================================================
   /// Do the backsubstitution for the DenseLU solver.
   /// WARNING: this class does not perform any PARANOID checks on the vectors -
   /// these are all performed in the solve(...) method.
   //=============================================================================
   void DenseLU::backsub(const DoubleVector& rhs, DoubleVector& result)
   {
     // Get pointers to first entries
     const double* rhs_pt = rhs.values_pt();
     double* result_pt = result.values_pt();
  
     // Copy the rhs vector into the result vector
     const unsigned long n = rhs.nrow();
     for (unsigned long i = 0; i < n; ++i)
     {
       result_pt[i] = rhs_pt[i];
     }
  
     // Loop over all rows for forward substition
     unsigned long k = 0;
     for (unsigned long i = 0; i < n; i++)
     {
       unsigned long ip = Index[i];
       double sum = result_pt[ip];
       result_pt[ip] = result_pt[i];
       if (k != 0)
       {
         for (unsigned long j = k - 1; j < i; j++)
         {
           sum -= LU_factors[n * i + j] * result_pt[j];
         }
       }
       else if (sum != 0.0)
       {
         k = i + 1;
       }
       result_pt[i] = sum;
     }
  
     // Now do the back substitution
     for (long i = long(n) - 1; i >= 0; i--)
     {
       double sum = result_pt[i];
       for (long j = i + 1; j < long(n); j++)
       {
         sum -= LU_factors[n * i + j] * result_pt[j];
       }
       result_pt[i] = sum / LU_factors[n * i + i];
     }
   }
  
   //=============================================================================
   /// Do the backsubstitution for the DenseLU solver.
   /// WARNING: this class does not perform any PARANOID checks on the vectors -
   /// these are all performed in the solve(...) method. So, if you call backsub
   /// directly, you have been warned...
   //=============================================================================
   void DenseLU::backsub(const Vector<double>& rhs, Vector<double>& result)
   {
     // Copy the rhs vector into the result vector
     const unsigned long n = rhs.size();
     for (unsigned long i = 0; i < n; ++i)
     {
       result[i] = rhs[i];
     }
  
     // Loop over all rows for forward substition
     unsigned long k = 0;
     for (unsigned long i = 0; i < n; i++)
     {
       unsigned long ip = Index[i];
       double sum = result[ip];
       result[ip] = result[i];
       if (k != 0)
       {
         for (unsigned long j = k - 1; j < i; j++)
         {
           sum -= LU_factors[n * i + j] * result[j];
         }
       }
       else if (sum != 0.0)
       {
         k = i + 1;
       }
       result[i] = sum;
     }
  
     // Now do the back substitution
     for (long i = long(n) - 1; i >= 0; i--)
     {
       double sum = result[i];
       for (long j = i + 1; j < long(n); j++)
       {
         sum -= LU_factors[n * i + j] * result[j];
       }
       result[i] = sum / LU_factors[n * i + i];
     }
   }
  
  
   //=============================================================================
   /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
   /// vector and returns the solution of the linear system.
   //============================================================================
   void DenseLU::solve(DoubleMatrixBase* const& matrix_pt,
                       const DoubleVector& rhs,
                       DoubleVector& result)
   {
 #ifdef PARANOID
     // check that the rhs vector is not distributed
     if (rhs.distribution_pt()->distributed())
     {
       std::ostringstream error_message_stream;
       error_message_stream
         << "The vectors rhs and result must not be distributed";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // check that the matrix is square
     if (matrix_pt->nrow() != matrix_pt->ncol())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The matrix at matrix_pt must be square.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
     // check that the matrix and the rhs vector have the same nrow()
     if (matrix_pt->nrow() != rhs.nrow())
     {
       std::ostringstream error_message_stream;
       error_message_stream
         << "The matrix and the rhs vector must have the same number of rows.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // if the matrix is distributable then it too should have the same
     // communicator as the rhs vector and should not be distributed
     DistributableLinearAlgebraObject* dist_matrix_pt =
       dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
     if (dist_matrix_pt != 0)
     {
       if (dist_matrix_pt->distribution_pt()->communicator_pt()->nproc() > 1 &&
           dist_matrix_pt->distribution_pt()->distributed() == true)
       {
         throw OomphLibError(
           "Matrix must not be distributed or only one processor",
           OOMPH_CURRENT_FUNCTION,
           OOMPH_EXCEPTION_LOCATION);
       }
       OomphCommunicator temp_comm(*rhs.distribution_pt()->communicator_pt());
       if (!(temp_comm == *dist_matrix_pt->distribution_pt()->communicator_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The matrix matrix_pt must have the same communicator as the "
              "vectors"
           << " rhs and result must have the same communicator";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
     // if the result vector is setup then check it is not distributed and has
     // the same communicator as the rhs vector
     if (result.distribution_built())
     {
       if (!(*result.distribution_pt() == *rhs.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The result vector distribution has been setup; it must have the "
           << "same distribution as the rhs vector.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     if (!result.distribution_built())
     {
       result.build(rhs.distribution_pt(), 0.0);
     }
  
     // set the distribution
     this->build_distribution(rhs.distribution_pt());
  
     // Time the solver
     double t_start = TimingHelpers::timer();
  
     // factorise
     factorise(matrix_pt);
  
     // backsubstitute
     backsub(rhs, result);
  
     // Doc time for solver
     double t_end = TimingHelpers::timer();
  
     Solution_time = t_end - t_start;
     if (Doc_time)
     {
       oomph_info << std::endl
                  << "CPU for solve with DenseLU   : "
                  << TimingHelpers::convert_secs_to_formatted_string(
                       Solution_time)
                  << std::endl
                  << std::endl;
     }
  
     // If we are not resolving then delete storage
     if (!Enable_resolve)
     {
       clean_up_memory();
     }
   }
  
   //=============================================================================
   /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
   /// vector and returns the solution of the linear system.
   //=============================================================================
   void DenseLU::solve(DoubleMatrixBase* const& matrix_pt,
                       const Vector<double>& rhs,
                       Vector<double>& result)
   {
     // Time the solver
     clock_t t_start = clock();
  
     factorise(matrix_pt);
     backsub(rhs, result);
  
     // Doc time for solver
     clock_t t_end = clock();
  
     Solution_time = double(t_end - t_start) / CLOCKS_PER_SEC;
     if (Doc_time)
     {
       oomph_info << "CPU for solve with DenseLU   : "
                  << TimingHelpers::convert_secs_to_formatted_string(
                       Solution_time)
                  << std::endl;
     }
  
     // If we are not resolving then delete storage
     if (!Enable_resolve)
     {
       clean_up_memory();
     }
   }
  
   //==================================================================
   /// Solver: Takes pointer to problem and returns the results Vector
   /// which contains the solution of the linear system defined by
   /// the problem's residual Vector. (Jacobian assembled by FD).
   //==================================================================
   void FD_LU::solve(Problem* const& problem_pt, DoubleVector& result)
   {
     // Initialise timer
     clock_t t_start = clock();
  
 #ifdef PARANOID
     // if the result vector is setup then check it is not distributed and has
     // the same communicator as the rhs vector
     if (result.built())
     {
       if (result.distributed())
       {
         std::ostringstream error_message_stream;
         error_message_stream << "The result vector must not be distributed";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     // Find # of degrees of freedom
     unsigned long n_dof = problem_pt->ndof();
  
     // Allocate storage for the residuals vector and the jacobian matrix
     DoubleVector residuals;
     DenseDoubleMatrix jacobian(n_dof);
  
     {
       // initialise timer
       clock_t t_start = clock();
  
       // Get the full jacobian by finite differencing)  VERY INEFFICIENT!
       problem_pt->get_fd_jacobian(residuals, jacobian);
  
       // compute jacobian setup time
       clock_t t_end = clock();
       Jacobian_setup_time = double(t_end - t_start) / CLOCKS_PER_SEC;
  
       // Report the time
       if (Doc_time)
       {
         oomph_info << std::endl
                    << "CPU for setup of Dense Jacobian: "
                    << TimingHelpers::convert_secs_to_formatted_string(
                         Jacobian_setup_time)
                    << std::endl
                    << std::endl;
       }
     }
  
     // Solve by dense LU decomposition (not efficient)
     solve(&jacobian, residuals, result);
  
     // Set the sign of the determinant of the jacobian
     problem_pt->sign_of_jacobian() = Sign_of_determinant_of_matrix;
  
     // Finalise/doc timings
     clock_t t_end = clock();
     double total_time = double(t_end - t_start) / CLOCKS_PER_SEC;
     if (Doc_time)
     {
       oomph_info << "CPU for FD DenseLU LinearSolver: "
                  << TimingHelpers::convert_secs_to_formatted_string(total_time)
                  << std::endl
                  << std::endl;
     }
   }
  
  
   //===================================================================
   // Interface to SuperLU wrapper
   //===================================================================
   extern "C"
   {
     int superlu(int*,
                 int*,
                 int*,
                 int*,
                 double*,
                 int*,
                 int*,
                 double*,
                 int*,
                 int*,
                 int*,
                 void*,
                 int*);
   }
  
  
 #ifdef OOMPH_HAS_MPI
   //===================================================================
   // Interface to SuperLU_DIST wrapper
   //===================================================================
   extern "C"
   {
     // Interface to distributed SuperLU solver where each processor
     // holds the entire matrix
     void superlu_dist_global_matrix(int opt_flag,
                                     int allow_permutations,
                                     int n,
                                     int nnz,
                                     double* values,
                                     int* row_index,
                                     int* col_start,
                                     double* b,
                                     int nprow,
                                     int npcol,
                                     int doc,
                                     void** data,
                                     int* info,
                                     MPI_Comm comm);
  
     // Interface to distributed SuperLU solver where each processor
     // holds part of the matrix
     void superlu_dist_distributed_matrix(int opt_flag,
                                          int allow_permutations,
                                          int n,
                                          int nnz_local,
                                          int nrow_local,
                                          int first_row,
                                          double* values,
                                          int* col_index,
                                          int* row_start,
                                          double* b,
                                          int nprow,
                                          int npcol,
                                          int doc,
                                          void** data,
                                          int* info,
                                          MPI_Comm comm);
  
     // helper method - just calls the superlu method dCompRow_to_CompCol to
     // convert the c-style vectors of a cr matrix to a cc matrix
     void superlu_cr_to_cc(int nrow,
                           int ncol,
                           int nnz,
                           double* cr_values,
                           int* cr_index,
                           int* cr_start,
                           double** cc_values,
                           int** cc_index,
                           int** cc_start);
   }
 #endif
  
  
   //===================================================================
   // Interface to SuperLU wrapper extras
   //===================================================================
   extern "C"
   {
     /// Function to calculate the number of bytes used to store the
     /// LU factors
     double get_lu_factor_memory_usage_in_bytes();
  
     /// Function to calculate the number of bytes used in calculating
     /// and storing the LU factors
     double get_total_memory_usage_in_bytes();
   }
  
 #ifdef OOMPH_HAS_MPI
   //===================================================================
   // Interface to SuperLU_DIST wrapper extras
   //===================================================================
   extern "C"
   {
     /// Function to calculate the number of bytes used to store the
     /// LU factors
     double get_lu_factor_memory_usage_in_bytes_dist();
  
     /// Function to calculate the number of bytes used in calculating
     /// and storing the LU factors
     double get_total_memory_usage_in_bytes_dist();
   }
 #endif
  
   //=============================================================================
   /// How much memory do the LU factors take up? In bytes
   /// NOTE: This has been scraped from dQuerySpace(...) in dmemory.c in
   ///                 external_src/oomph_superlu_4.3
   //=============================================================================
   double SuperLUSolver::get_memory_usage_for_lu_factors()
   {
     // If we're using the non-distributed version of SuperLU and the LU
     // factors have also been computed
     if ((Solver_type != Distributed) && (Serial_f_factors != 0))
     {
       return get_lu_factor_memory_usage_in_bytes();
     }
 #ifdef OOMPH_HAS_MPI
     // If we're using SuperLU dist and the LU factors have been computed
     if ((Solver_type == Distributed) && (Dist_solver_data_pt != 0))
     {
       return get_lu_factor_memory_usage_in_bytes_dist();
     }
 #endif
     // If the factors haven't been computed we can't do anything
     else
     {
       return 0.0;
     }
   } // End of get_memory_usage_for_lu_factors
  
  
   //=============================================================================
   /// How much memory was used in total? In bytes
   /// NOTE: This has been scraped from dQuerySpace(...) in dmemory.c in
   ///                 external_src/oomph_superlu_4.3
   //=============================================================================
   double SuperLUSolver::get_total_needed_memory()
   {
     // If we're using the non-distributed version of SuperLU and the LU
     // factors have also been computed
     if ((Solver_type != Distributed) && (Serial_f_factors != 0))
     {
       return get_total_memory_usage_in_bytes();
     }
 #ifdef OOMPH_HAS_MPI
     // If we're using SuperLU dist and the LU factors have been computed
     if ((Solver_type == Distributed) && (Dist_solver_data_pt != 0))
     {
       return get_total_memory_usage_in_bytes_dist();
     }
 #endif
     // If the factors haven't been computed we can't do anything
     else
     {
       return 0.0;
     }
   } // End of get_total_needed_memory
  
  
   //==========================================================================
   /// Solver: Takes pointer to problem and returns the results Vector
   /// which contains the solution of the linear system defined by
   /// the problem's fully assembled Jacobian and residual Vector.
   //==========================================================================
   void SuperLUSolver::solve(Problem* const& problem_pt, DoubleVector& result)
   {
     // wipe memory
     this->clean_up_memory();
  
 #ifdef OOMPH_HAS_MPI
     // USING SUPERLU DIST
     /// //////////////////
     if (Solver_type == Distributed ||
         (Solver_type == Default && problem_pt->communicator_pt()->nproc() > 1))
     {
       // init the timers
       double t_start = TimingHelpers::timer();
  
       // number of dofs
       unsigned n_dof = problem_pt->ndof();
  
       // set the distribution
       LinearAlgebraDistribution dist(
         problem_pt->communicator_pt(), n_dof, !Dist_use_global_solver);
       this->build_distribution(dist);
  
       // Take a copy of Delete_matrix_data
       bool copy_of_Delete_matrix_data = Dist_delete_matrix_data;
  
       // Set Delete_matrix to true
       Dist_delete_matrix_data = true;
  
       // Use the distributed version of SuperLU_DIST?
       if (!Dist_use_global_solver)
       {
         // Initialise timer
         double t_start = TimingHelpers::timer();
  
         // Storage for the residuals vector
         DoubleVector residuals(this->distribution_pt(), 0.0);
  
         // Get the sparse jacobian and residuals of the problem
         CRDoubleMatrix jacobian(this->distribution_pt());
         problem_pt->get_jacobian(residuals, jacobian);
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << "Time to set up CRDoubleMatrix Jacobian         : "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the distribution of the result has been build and
           // does not match that of
           // the solver then redistribute before the solve and return
           // to the incoming distribution afterwards.
           if ((result.built()) &&
               (!(*result.distribution_pt() == *this->distribution_pt())))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(this->distribution_pt(), 0.0);
             solve(&jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           else
           {
             solve(&jacobian, residuals, result);
           }
         }
       }
       // Otherwise its the global solve version
       else
       {
         // Storage for the residuals vector
         // A non-distriubted residuals vector
         LinearAlgebraDistribution dist(
           problem_pt->communicator_pt(), problem_pt->ndof(), false);
         DoubleVector residuals(&dist, 0.0);
         CRDoubleMatrix jacobian(&dist);
  
         // Get the sparse jacobian and residuals of the problem
         problem_pt->get_jacobian(residuals, jacobian);
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << "Time to set up CR Jacobian    : "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the result distribution has been built and
           // does not match the global distribution
           // the redistribute before the solve and then return to the
           // distributed version afterwards
           if ((result.built()) && (!(*result.distribution_pt() == dist)))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(&dist, 0.0);
             solve(&jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           else
           {
             solve(&jacobian, residuals, result);
           }
         }
       }
       // Set Delete_matrix back to original value
       Dist_delete_matrix_data = copy_of_Delete_matrix_data;
     }
  
     // OTHERWISE WE ARE USING SUPERLU (SERIAL)
     /// ///////////////////////////////////////
     else
 #endif
     {
       // set the solver distribution
       LinearAlgebraDistribution dist(
         problem_pt->communicator_pt(), problem_pt->ndof(), false);
       this->build_distribution(dist);
  
       // Allocate storage for the residuals vector
       DoubleVector residuals(dist, 0.0);
  
       // Use the compressed row version?
       if (Serial_compressed_row_flag)
       {
         // Initialise timer
         double t_start = TimingHelpers::timer();
  
         // Get the sparse jacobian and residuals of the problem
         CRDoubleMatrix CR_jacobian(this->distribution_pt());
         problem_pt->get_jacobian(residuals, CR_jacobian);
  
         // If we want to compute the gradient for the globally convergent
         // Newton method, then do it here
         if (Compute_gradient)
         {
           // Compute it
           CR_jacobian.multiply_transpose(residuals,
                                          Gradient_for_glob_conv_newton_solve);
           // Set the flag
           Gradient_has_been_computed = true;
         }
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << std::endl
                      << "Time to set up CRDoubleMatrix Jacobian : "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the result vector is built and distributed
           // then need to redistribute into the same form as the
           // RHS (non-distributed)
           if ((result.built()) &&
               (!(*result.distribution_pt() == *this->distribution_pt())))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(this->distribution_pt(), 0.0);
             solve(&CR_jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           // Otherwise just solve
           else
           {
             solve(&CR_jacobian, residuals, result);
           }
         }
       }
       // Otherwise its the compressed column version
       else
       {
         // Initialise timer
         double t_start = TimingHelpers::timer();
  
         // Get the sparse jacobian and residuals of the problem
         CCDoubleMatrix CC_jacobian;
         problem_pt->get_jacobian(residuals, CC_jacobian);
  
         // If we want to compute the gradient for the globally convergent
         // Newton method, then do it here
         if (Compute_gradient)
         {
           // Compute it
           CC_jacobian.multiply_transpose(residuals,
                                          Gradient_for_glob_conv_newton_solve);
           // Set the flag
           Gradient_has_been_computed = true;
         }
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << "\nTime to set up CCDoubleMatrix Jacobian: "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the result vector is built and distributed
           // then need to redistribute into the same form as the
           // RHS
           if ((result.built()) &&
               (!(*result.distribution_pt() == *this->distribution_pt())))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(this->distribution_pt(), 0.0);
             solve(&CC_jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           // Otherwise just solve
           else
           {
             solve(&CC_jacobian, residuals, result);
           }
         }
       }
  
       // Set the sign of the jacobian
       //(this is computed in the LU decomposition phase)
       problem_pt->sign_of_jacobian() = Serial_sign_of_determinant_of_matrix;
     }
   }
  
   //=========================================================================
   /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
   /// vector and returns the solution of the linear system. Problem pointer
   /// defaults to NULL and can be omitted. The function returns the global
   /// result Vector.
   /// Note: if Delete_matrix_data is true the function
   /// matrix_pt->clean_up_memory() will be used to wipe the matrix data.
   //=========================================================================
   void SuperLUSolver::solve(DoubleMatrixBase* const& matrix_pt,
                             const DoubleVector& rhs,
                             DoubleVector& result)
   {
     // Initialise timer
     double t_start = TimingHelpers::timer();
  
     // Pointer used in various places
     CRDoubleMatrix* cr_pt = 0;
  
  
 #ifdef PARANOID
     // check that the rhs vector is setup
     if (!rhs.built())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The vectors rhs must be setup";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // check that the matrix is square
     if (matrix_pt->nrow() != matrix_pt->ncol())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The matrix at matrix_pt must be square.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // check that the matrix has some entries, and so has a values_pt that
     // makes sense (only for CR because CC is never used I think dense
     // matrices will be safe since they don't use a values pointer).
     cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
     if (cr_pt != 0)
     {
       if (cr_pt->nnz() == 0)
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "Attempted to call SuperLu on a CRDoubleMatrix with no entries, "
           << "SuperLU would segfault (because the values array pt is "
           << "uninitialised or null).";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
  
     // check that the matrix and the rhs vector have the same nrow()
     if (matrix_pt->nrow() != rhs.nrow())
     {
       std::ostringstream error_message_stream;
       error_message_stream
         << "The matrix and the rhs vector must have the same number of rows.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // if the matrix is distributable then should have the same distribution
     // as the rhs vector
     DistributableLinearAlgebraObject* dist_matrix_pt =
       dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
     if (dist_matrix_pt != 0)
     {
       if (!(*dist_matrix_pt->distribution_pt() == *rhs.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The matrix matrix_pt must have the same distribution as the "
           << "rhs vector.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
     // if the matrix is not distributable then it the rhs vector should not be
     // distributed
     else
     {
       if (rhs.distribution_pt()->distributed())
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The matrix (matrix_pt) is not distributable and therefore the rhs"
           << " vector must not be distributed";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
     // if the result vector is setup then check it has the same distribution
     // as the rhs
     if (result.built())
     {
       if (!(*result.distribution_pt() == *rhs.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The result vector distribution has been setup; it must have the "
           << "same distribution as the rhs vector.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     // set the distribution
     if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt))
     {
       // the solver has the same distribution as the matrix if possible
       this->build_distribution(
         dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
           ->distribution_pt());
     }
     else
     {
       // the solver has the same distribution as the RHS
       this->build_distribution(rhs.distribution_pt());
     }
  
     // Doc time for solve
     double t_factorise_start = TimingHelpers::timer();
  
     // Factorise the matrix
     factorise(matrix_pt);
  
     // Doc the end time
     double t_factorise_end = TimingHelpers::timer();
  
     // How long did the factorisation take?
     double factorise_time = t_factorise_end - t_factorise_start;
  
     // Try and upcast the matrix to a CRDoubleMatrix
     // CRDoubleMatrix*
     cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
  
     // If the input matrix is a CRDoubleMatrix
     if (cr_pt != 0)
     {
       // ...and actually has an entry
       if (cr_pt->nnz() != 0)
       {
         // Find out how many rows there are in the global Jacobian
         unsigned n_row = cr_pt->nrow();
  
         // And how many non-zeros there are in the global Jacobian
         unsigned n_nnz = cr_pt->nnz();
  
         // Get the memory usage (in bytes) for the global Jacobian storage
         double memory_usage_for_jacobian =
           ((2 * ((n_row + 1) * sizeof(int))) +
            (n_nnz * (sizeof(int) + sizeof(double))));
  
         // Get the memory usage (in bytes) for storage of the LU factors in
         // SuperLU
         double memory_usage_for_lu_storage = get_total_needed_memory();
  
         // Get the memory usage (in bytes) for storage of the LU factors in
         // SuperLU
         double total_memory_usage =
           memory_usage_for_jacobian + memory_usage_for_lu_storage;
  
  
         // How much memory have we used?
         if (Doc_stats)
         {
           oomph_info << "\nMemory statistics:"
                      << "\n - Memory used to store the Jacobian (MB): "
                      << memory_usage_for_jacobian / 1.0e+06
                      << "\n - Memory used to store the LU factors (MB): "
                      << memory_usage_for_lu_storage / 1.0e+06
                      << "\n - Total memory used for matrix storage (MB): "
                      << total_memory_usage / 1.0e+06 << "\n"
                      << std::endl;
         }
       }
     } // if (cr_pt!=0)
  
     // Doc the start time
     double t_backsub_start = TimingHelpers::timer();
  
     // Now do the back solve
     backsub(rhs, result);
  
     // Doc the end time
     double t_backsub_end = TimingHelpers::timer();
  
     // How long did the back substitution take?
     double backsub_time = t_backsub_end - t_backsub_start;
  
     // Doc time for solve
     double t_end = TimingHelpers::timer();
     Solution_time = t_end - t_start;
     if (Doc_time)
     {
       oomph_info
         << "Time for LU factorisation : "
         << TimingHelpers::convert_secs_to_formatted_string(factorise_time)
         << "\nTime for back-substitution: "
         << TimingHelpers::convert_secs_to_formatted_string(backsub_time)
         << "\nTime for SuperLUSolver solve (ndof=" << matrix_pt->nrow() << "): "
         << TimingHelpers::convert_secs_to_formatted_string(Solution_time)
         << std::endl;
     }
  
     // If we are not storing the solver data for resolves, delete it
     if (!Enable_resolve)
     {
       clean_up_memory();
     }
   }
  
  
   //=============================================================================
   /// Solver: Takes pointer to problem and returns the results Vector
   /// which contains the solution of the linear system defined by
   /// the problem's fully assembled Jacobian and residual Vector.
   //=============================================================================
   void SuperLUSolver::solve_transpose(Problem* const& problem_pt,
                                       DoubleVector& result)
   {
     // wipe memory
     this->clean_up_memory();
  
 #ifdef OOMPH_HAS_MPI
     // USING SUPERLU DIST
     /// //////////////////
     if (Solver_type == Distributed ||
         (Solver_type == Default && problem_pt->communicator_pt()->nproc() > 1))
     {
       // init the timers
       double t_start = TimingHelpers::timer();
  
       // number of dofs
       unsigned n_dof = problem_pt->ndof();
  
       // set the distribution
       LinearAlgebraDistribution dist(
         problem_pt->communicator_pt(), n_dof, !Dist_use_global_solver);
       this->build_distribution(dist);
  
       // Take a copy of Delete_matrix_data
       bool copy_of_Delete_matrix_data = Dist_delete_matrix_data;
  
       // Set Delete_matrix to true
       Dist_delete_matrix_data = true;
  
       // Use the distributed version of SuperLU_DIST?
       if (!Dist_use_global_solver)
       {
         // Initialise timer
         double t_start = TimingHelpers::timer();
  
         // Storage for the residuals vector
         DoubleVector residuals(this->distribution_pt(), 0.0);
  
         // Get the sparse jacobian and residuals of the problem
         CRDoubleMatrix jacobian(this->distribution_pt());
         problem_pt->get_jacobian(residuals, jacobian);
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << "Time to set up CRDoubleMatrix Jacobian         : "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the distribution of the result has been build and
           // does not match that of
           // the solver then redistribute before the solve and return
           // to the incoming distribution afterwards.
           if ((result.built()) &&
               (!(*result.distribution_pt() == *this->distribution_pt())))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(this->distribution_pt(), 0.0);
             solve_transpose(&jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           else
           {
             solve_transpose(&jacobian, residuals, result);
           }
         }
       }
       // Otherwise its the global solve version
       else
       {
         // Storage for the residuals vector
         // A non-distriubted residuals vector
         LinearAlgebraDistribution dist(
           problem_pt->communicator_pt(), problem_pt->ndof(), false);
         DoubleVector residuals(&dist, 0.0);
         CRDoubleMatrix jacobian(&dist);
  
         // Get the sparse jacobian and residuals of the problem
         problem_pt->get_jacobian(residuals, jacobian);
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << "Time to set up CR Jacobian    : "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the result distribution has been built and
           // does not match the global distribution
           // the redistribute before the solve and then return to the
           // distributed version afterwards
           if ((result.built()) && (!(*result.distribution_pt() == dist)))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(&dist, 0.0);
             solve_transpose(&jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           else
           {
             solve_transpose(&jacobian, residuals, result);
           }
         }
       }
       // Set Delete_matrix back to original value
       Dist_delete_matrix_data = copy_of_Delete_matrix_data;
     }
  
     // OTHERWISE WE ARE USING SUPERLU (SERIAL)
     /// ///////////////////////////////////////
     else
 #endif
     {
       // set the solver distribution
       LinearAlgebraDistribution dist(
         problem_pt->communicator_pt(), problem_pt->ndof(), false);
       this->build_distribution(dist);
  
       // Allocate storage for the residuals vector
       DoubleVector residuals(dist, 0.0);
  
       // Use the compressed row version?
       if (Serial_compressed_row_flag)
       {
         // Initialise timer
         double t_start = TimingHelpers::timer();
  
         // Get the sparse jacobian and residuals of the problem
         CRDoubleMatrix CR_jacobian(this->distribution_pt());
         problem_pt->get_jacobian(residuals, CR_jacobian);
  
         // If we want to compute the gradient for the globally convergent
         // Newton method, then do it here
         if (Compute_gradient)
         {
           // Compute it
           CR_jacobian.multiply_transpose(residuals,
                                          Gradient_for_glob_conv_newton_solve);
           // Set the flag
           Gradient_has_been_computed = true;
         }
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << std::endl
                      << "Time to set up CRDoubleMatrix Jacobian: "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the result vector is built and distributed
           // then need to redistribute into the same form as the
           // RHS (non-distributed)
           if ((result.built()) &&
               (!(*result.distribution_pt() == *this->distribution_pt())))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(this->distribution_pt(), 0.0);
             solve_transpose(&CR_jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           // Otherwise just solve
           else
           {
             solve_transpose(&CR_jacobian, residuals, result);
           }
         }
       }
       // Otherwise its the compressed column version
       else
       {
         // Initialise timer
         double t_start = TimingHelpers::timer();
  
         // Get the sparse jacobian and residuals of the problem
         CCDoubleMatrix CC_jacobian;
         problem_pt->get_jacobian(residuals, CC_jacobian);
  
         // If we want to compute the gradient for the globally convergent
         // Newton method, then do it here
         if (Compute_gradient)
         {
           // Compute it
           CC_jacobian.multiply_transpose(residuals,
                                          Gradient_for_glob_conv_newton_solve);
           // Set the flag
           Gradient_has_been_computed = true;
         }
  
         // Doc time for setup
         double t_end = TimingHelpers::timer();
         Jacobian_setup_time = t_end - t_start;
         if (Doc_time)
         {
           oomph_info << "\nTime to set up CCDoubleMatrix Jacobian: "
                      << TimingHelpers::convert_secs_to_formatted_string(
                           Jacobian_setup_time)
                      << std::endl;
         }
  
         // Now call the linear algebra solve, if desired
         if (!Suppress_solve)
         {
           // If the result vector is built and distributed
           // then need to redistribute into the same form as the
           // RHS
           if ((result.built()) &&
               (!(*result.distribution_pt() == *this->distribution_pt())))
           {
             LinearAlgebraDistribution temp_global_dist(
               result.distribution_pt());
             result.build(this->distribution_pt(), 0.0);
             solve_transpose(&CC_jacobian, residuals, result);
             result.redistribute(&temp_global_dist);
           }
           // Otherwise just solve
           else
           {
             solve_transpose(&CC_jacobian, residuals, result);
           }
         }
       }
  
       // Set the sign of the jacobian
       //(this is computed in the LU decomposition phase)
       problem_pt->sign_of_jacobian() = Serial_sign_of_determinant_of_matrix;
     }
   }
  
   //=========================================================================
   /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
   /// vector and returns the solution of the linear system. Problem pointer
   /// defaults to NULL and can be omitted. The function returns the global
   /// result Vector.
   /// Note: if Delete_matrix_data is true the function
   /// matrix_pt->clean_up_memory() will be used to wipe the matrix data.
   //=========================================================================
   void SuperLUSolver::solve_transpose(DoubleMatrixBase* const& matrix_pt,
                                       const DoubleVector& rhs,
                                       DoubleVector& result)
   {
     // Initialise timer
     double t_start = TimingHelpers::timer();
  
     // Pointer used in various places
     CRDoubleMatrix* cr_pt = 0;
  
 #ifdef PARANOID
     // check that the rhs vector is setup
     if (!rhs.built())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The vectors rhs must be setup";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // check that the matrix is square
     if (matrix_pt->nrow() != matrix_pt->ncol())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The matrix at matrix_pt must be square.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // check that the matrix has some entries, and so has a values_pt that
     // makes sense (only for CR because CC is never used I think dense
     // matrices will be safe since they don't use a values pointer).
     cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
     if (cr_pt != 0)
     {
       if (cr_pt->nnz() == 0)
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "Attempted to call SuperLu on a CRDoubleMatrix with no entries, "
           << "SuperLU would segfault (because the values array pt is "
           << "uninitialised or null).";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
  
     // check that the matrix and the rhs vector have the same nrow()
     if (matrix_pt->nrow() != rhs.nrow())
     {
       std::ostringstream error_message_stream;
       error_message_stream
         << "The matrix and the rhs vector must have the same number of rows.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // if the matrix is distributable then should have the same distribution
     // as the rhs vector
     DistributableLinearAlgebraObject* dist_matrix_pt =
       dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
     if (dist_matrix_pt != 0)
     {
       if (!(*dist_matrix_pt->distribution_pt() == *rhs.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The matrix matrix_pt must have the same distribution as the "
           << "rhs vector.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
     // if the matrix is not distributable then it the rhs vector should not be
     // distributed
     else
     {
       if (rhs.distribution_pt()->distributed())
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The matrix (matrix_pt) is not distributable and therefore the rhs"
           << " vector must not be distributed";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
     // if the result vector is setup then check it has the same distribution
     // as the rhs
     if (result.built())
     {
       if (!(*result.distribution_pt() == *rhs.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The result vector distribution has been setup; it must have the "
           << "same distribution as the rhs vector.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     // set the distribution
     if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt))
     {
       // the solver has the same distribution as the matrix if possible
       this->build_distribution(
         dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
           ->distribution_pt());
     }
     else
     {
       // the solver has the same distribution as the RHS
       this->build_distribution(rhs.distribution_pt());
     }
  
     // Doc time for solve
     double t_factorise_start = TimingHelpers::timer();
  
     // Factorise the matrix
     factorise(matrix_pt);
  
     // Doc the end time
     double t_factorise_end = TimingHelpers::timer();
  
     // How long did the factorisation take?
     double factorise_time = t_factorise_end - t_factorise_start;
  
     // Try and upcast the matrix to a CRDoubleMatrix
     // CRDoubleMatrix*
     cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
  
     // If the input matrix is a CRDoubleMatrix
     if (cr_pt != 0)
     {
       // ...and actually has an entry
       if (cr_pt->nnz() != 0)
       {
         // Find out how many rows there are in the global Jacobian
         unsigned n_row = cr_pt->nrow();
  
         // And how many non-zeros there are in the global Jacobian
         unsigned n_nnz = cr_pt->nnz();
  
         // Get the memory usage (in bytes) for the global Jacobian storage
         double memory_usage_for_jacobian =
           ((2 * ((n_row + 1) * sizeof(int))) +
            (n_nnz * (sizeof(int) + sizeof(double))));
  
         // Get the memory usage (in bytes) for storage of the LU factors in
         // SuperLU
         double memory_usage_for_lu_storage = get_total_needed_memory();
  
         // Get the memory usage (in bytes) for storage of the LU factors in
         // SuperLU
         double total_memory_usage =
           memory_usage_for_jacobian + memory_usage_for_lu_storage;
  
         // How much memory have we used?
         if (Doc_stats)
         {
           oomph_info << "\nMemory statistics:"
                      << "\n - Memory used to store the Jacobian (MB): "
                      << memory_usage_for_jacobian / 1.0e+06
                      << "\n - Memory used to store the LU factors (MB): "
                      << memory_usage_for_lu_storage / 1.0e+06
                      << "\n - Total memory used for matrix storage (MB): "
                      << total_memory_usage / 1.0e+06 << "\n"
                      << std::endl;
         }
       }
     } // if (cr_pt!=0)
  
     // Doc the start time
     double t_backsub_start = TimingHelpers::timer();
  
     // Now do the back solve
     backsub_transpose(rhs, result);
  
     // Doc the end time
     double t_backsub_end = TimingHelpers::timer();
  
     // How long did the back substitution take?
     double backsub_time = t_backsub_end - t_backsub_start;
  
     // Doc time for solve
     double t_end = TimingHelpers::timer();
     Solution_time = t_end - t_start;
     if (Doc_time)
     {
       oomph_info
         << "Time for LU factorisation : "
         << TimingHelpers::convert_secs_to_formatted_string(factorise_time)
         << "\nTime for back-substitution: "
         << TimingHelpers::convert_secs_to_formatted_string(backsub_time)
         << "\nTime for SuperLUSolver solve (ndof=" << matrix_pt->nrow() << "): "
         << TimingHelpers::convert_secs_to_formatted_string(Solution_time)
         << std::endl;
     }
  
     // If we are not storing the solver data for resolves, delete it
     if (!Enable_resolve)
     {
       clean_up_memory();
     }
   } // End of solve_transpose
  
   //===============================================================
   /// Resolve the system for a given RHS
   //===============================================================
   void SuperLUSolver::resolve(const DoubleVector& rhs, DoubleVector& result)
   {
     // Store starting time for solve
     double t_start = TimingHelpers::timer();
  
     // backsub
     backsub(rhs, result);
  
     // Doc time for solve
     double t_end = TimingHelpers::timer();
     Solution_time = t_end - t_start;
     if (Doc_time)
     {
       oomph_info << "Time for SuperLUSolver solve (ndof=" << rhs.nrow() << "): "
                  << TimingHelpers::convert_secs_to_formatted_string(t_end -
                                                                     t_start)
                  << std::endl;
     }
   }
  
  
   //===============================================================
   /// Resolve the (transposed) system for a given RHS
   //===============================================================
   void SuperLUSolver::resolve_transpose(const DoubleVector& rhs,
                                         DoubleVector& result)
   {
     // Store starting time for solve
     double t_start = TimingHelpers::timer();
  
     // Backsub (but solve the transposed system)
     backsub_transpose(rhs, result);
  
     // Doc time for solve
     double t_end = TimingHelpers::timer();
     Solution_time = t_end - t_start;
     if (Doc_time)
     {
       oomph_info << "Time for SuperLUSolver solve (ndof=" << rhs.nrow() << "): "
                  << TimingHelpers::convert_secs_to_formatted_string(t_end -
                                                                     t_start)
                  << std::endl;
     }
   }
  
  
   //===================================================================
   /// LU decompose the matrix addressed by matrix_pt by using
   /// the SuperLU solver. The resulting matrix factors are stored
   /// internally.
   //===================================================================
   void SuperLUSolver::factorise(DoubleMatrixBase* const& matrix_pt)
   {
     // wipe memory
     this->clean_up_memory();
  
     // if we have mpi and the solver is distributed or default and nproc
     // gt 1
 #ifdef OOMPH_HAS_MPI
     DistributableLinearAlgebraObject* dist_matrix_pt =
       dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
     unsigned nproc = 1;
     if (dist_matrix_pt != 0)
     {
       nproc = dist_matrix_pt->distribution_pt()->communicator_pt()->nproc();
     }
     if (Solver_type == Distributed || (Solver_type == Default && nproc > 1 &&
                                        MPI_Helpers::mpi_has_been_initialised()))
     {
       // if the matrix is a distributed linear algebra object then use SuperLU
       // dist
       if (dist_matrix_pt != 0)
       {
         factorise_distributed(matrix_pt);
         Using_dist = true;
       }
       else
       {
         factorise_serial(matrix_pt);
         Using_dist = false;
       }
     }
     else
 #endif
     {
       factorise_serial(matrix_pt);
       Using_dist = false;
     }
   }
  
 #ifdef OOMPH_HAS_MPI
   //=============================================================================
   /// LU decompose the matrix addressed by matrix_pt using
   /// the SuperLU_DIST solver. The resulting matrix factors are stored
   /// internally.
   //=============================================================================
   void SuperLUSolver::factorise_distributed(DoubleMatrixBase* const& matrix_pt)
   {
     // Check that we have a square matrix
 #ifdef PARANOID
     int m = matrix_pt->ncol();
     int n = matrix_pt->nrow();
     if (n != m)
     {
       std::ostringstream error_message_stream;
       error_message_stream << "Can only solve for square matrices\n"
                            << "N, M " << n << " " << m << std::endl;
  
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // number of processors
     unsigned nproc = MPI_Helpers::communicator_pt()->nproc();
     if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
     {
       nproc = dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
                 ->distribution_pt()
                 ->communicator_pt()
                 ->nproc();
     }
  
     // Find number of rows and columns for the process grid
     // First guess at number of rows:
     int nprow = int(sqrt(double(nproc)));
  
     // Does this evenly divide the processor grid?
     while (nprow > 1)
     {
       if (nproc % nprow == 0) break;
       nprow -= 1;
     }
  
     // Store Number of rows/columns for process grid
     Dist_nprow = nprow;
     Dist_npcol = nproc / Dist_nprow;
  
     // Make sure any existing factors are deleted
     clean_up_memory();
  
     // Doc (0/1) = (true/false)
     int doc = !Doc_stats;
  
     // Rset Info
     Dist_info = 0;
  
     // Flag for row and column permutations
     int allow_permutations = Dist_allow_row_and_col_permutations;
  
     // Is it a DistributedCRDoubleMatrix?
     if (dynamic_cast<CRDoubleMatrix*>(matrix_pt) != 0)
     {
       // Get a cast pointer to the matrix
       CRDoubleMatrix* cr_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
  
       // Get the distribution from the matrix
       this->build_distribution(cr_matrix_pt->distribution_pt());
  
 #ifdef PARANOID
       // paranoid check that the matrix has been setup
       if (!cr_matrix_pt->built())
       {
         throw OomphLibError(
           "To apply SuperLUSolver to a CRDoubleMatrix - it must be built",
           OOMPH_CURRENT_FUNCTION,
           OOMPH_EXCEPTION_LOCATION);
       }
 #endif
  
       // if the matrix is distributed then setup setup superlu dist distributed
       if (cr_matrix_pt->distributed())
       {
         // Find the number of non-zero entries in the matrix
         const int nnz_local = int(cr_matrix_pt->nnz());
  
         // Set up the pointers to the matrix.
         // NOTE: these arrays (accessed via value_pt, index_pt and
         // start_pt) may be modified by the SuperLU_DIST routines, and so
         // a copy must be taken if the matrix is to be preserved.
  
         // Copy values
         Dist_value_pt = new double[nnz_local];
         double* matrix_value_pt = cr_matrix_pt->value();
         for (int i = 0; i < nnz_local; i++)
         {
           Dist_value_pt[i] = matrix_value_pt[i];
         }
  
         // Copy column indices
         Dist_index_pt = new int[nnz_local];
         int* matrix_index_pt = cr_matrix_pt->column_index();
         for (int i = 0; i < nnz_local; i++)
         {
           Dist_index_pt[i] = matrix_index_pt[i];
         }
  
         // Copy row starts
         int nrow_local = cr_matrix_pt->nrow_local();
         Dist_start_pt = new int[nrow_local + 1];
         int* matrix_start_pt = cr_matrix_pt->row_start();
         for (int i = 0; i <= nrow_local; i++)
         {
           Dist_start_pt[i] = matrix_start_pt[i];
         }
  
         // cache
         int ndof = cr_matrix_pt->distribution_pt()->nrow();
         int first_row = cr_matrix_pt->first_row();
  
         // Now delete the matrix if we are allowed
         if (Dist_delete_matrix_data == true)
         {
           cr_matrix_pt->clear();
         }
  
         // Factorize
         superlu_dist_distributed_matrix(
           1,
           allow_permutations,
           ndof,
           nnz_local,
           nrow_local,
           first_row,
           Dist_value_pt,
           Dist_index_pt,
           Dist_start_pt,
           0,
           Dist_nprow,
           Dist_npcol,
           doc,
           &Dist_solver_data_pt,
           &Dist_info,
           this->distribution_pt()->communicator_pt()->mpi_comm());
  
         // Record that data is stored
         Dist_distributed_solve_data_allocated = true;
       }
       // else the CRDoubleMatrix is not distributed
       else
       {
         // Find the number of non-zero entries in the matrix
         const int nnz = int(cr_matrix_pt->nnz());
  
         // cache the number of rows
         int nrow = cr_matrix_pt->nrow();
  
         // Set up the pointers to the matrix.
         // NOTE: these arrays (accessed via value_pt, index_pt and
         // start_pt) may be modified by the SuperLU_DIST routines, and so
         // a copy must be taken if the matrix is to be preserved.
  
         // create the corresponing cc matrix
         superlu_cr_to_cc(nrow,
                          nrow,
                          nnz,
                          cr_matrix_pt->value(),
                          cr_matrix_pt->column_index(),
                          cr_matrix_pt->row_start(),
                          &Dist_value_pt,
                          &Dist_index_pt,
                          &Dist_start_pt);
  
         // Delete the matrix if we are allowed
         if (Dist_delete_matrix_data == true)
         {
           cr_matrix_pt->clear();
         }
  
         // do the factorization
         superlu_dist_global_matrix(
           1,
           allow_permutations,
           nrow,
           nnz,
           Dist_value_pt,
           Dist_index_pt,
           Dist_start_pt,
           0,
           Dist_nprow,
           Dist_npcol,
           doc,
           &Dist_solver_data_pt,
           &Dist_info,
           this->distribution_pt()->communicator_pt()->mpi_comm());
  
         // Record that data is stored
         Dist_global_solve_data_allocated = true;
       }
     }
  
     // Or is it a CCDoubleMatrix?
     else if (dynamic_cast<CCDoubleMatrix*>(matrix_pt))
     {
       // Get a cast pointer to the matrix
       CCDoubleMatrix* serial_matrix_pt =
         dynamic_cast<CCDoubleMatrix*>(matrix_pt);
  
       // Find the number of non-zero entries in the matrix
       const int nnz = int(serial_matrix_pt->nnz());
  
       // Find # of degrees of freedom (variables)
       int ndof = int(serial_matrix_pt->nrow());
  
       // Find the local number of degrees of freedom in the linear system
       int ndof_local = ndof;
  
       // Set up the pointers to the matrix.
       // NOTE: these arrays (accessed via value_pt, index_pt and
       // start_pt) may be modified by the SuperLU_DIST routines, and so
       // a copy must be taken if the matrix is to be preserved.
  
       // Copy values
       Dist_value_pt = new double[nnz];
       double* matrix_value_pt = serial_matrix_pt->value();
       for (int i = 0; i < nnz; i++)
       {
         Dist_value_pt[i] = matrix_value_pt[i];
       }
  
       // copy row indices
       Dist_index_pt = new int[nnz];
       int* matrix_index_pt = serial_matrix_pt->row_index();
       for (int i = 0; i < nnz; i++)
       {
         Dist_index_pt[i] = matrix_index_pt[i];
       }
  
       // copy column starts
       Dist_start_pt = new int[ndof_local + 1];
       int* matrix_start_pt = serial_matrix_pt->column_start();
       for (int i = 0; i <= ndof_local; i++)
       {
         Dist_start_pt[i] = matrix_start_pt[i];
       }
  
       // Delete the matrix if we are allowed
       if (Dist_delete_matrix_data == true)
       {
         serial_matrix_pt->clean_up_memory();
       }
  
       // do the factorization
       superlu_dist_global_matrix(
         1,
         allow_permutations,
         ndof,
         nnz,
         Dist_value_pt,
         Dist_index_pt,
         Dist_start_pt,
         0,
         Dist_nprow,
         Dist_npcol,
         doc,
         &Dist_solver_data_pt,
         &Dist_info,
         this->distribution_pt()->communicator_pt()->mpi_comm());
  
       // Record that data is stored
       Dist_global_solve_data_allocated = true;
     }
     // Otherwise throw an error
     else
     {
       std::ostringstream error_message_stream;
       error_message_stream << "SuperLUSolver implemented only for "
                            << " CCDoubleMatrix, CRDoubleMatrix\n"
                            << "and DistributedCRDoubleMatrix matrices\n";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // Throw an error if superLU returned an error status in info.
     if (Dist_info != 0)
     {
       std::ostringstream error_msg;
       error_msg << "SuperLU returned the error status code " << Dist_info
                 << " . See the SuperLU documentation for what this means.";
       throw OomphLibError(
         error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
   }
 #endif
  
   //===================================================================
   /// LU decompose the matrix addressed by matrix_pt by using
   /// the SuperLU solver. The resulting matrix factors are stored
   /// internally.
   //===================================================================
   void SuperLUSolver::factorise_serial(DoubleMatrixBase* const& matrix_pt)
   {
 #ifdef PARANOID
     // PARANOID check that if the matrix is distributable then it should not be
     // then it should not be distributed
     if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
     {
       if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt)
             ->distributed())
       {
         std::ostringstream error_message_stream;
         error_message_stream << "The matrix must not be distributed.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     // Find # of degrees of freedom (variables)
     int n = matrix_pt->nrow();
  
     // Check that we have a square matrix
 #ifdef PARANOID
     int m = matrix_pt->ncol();
     if (n != m)
     {
       std::ostringstream error_message_stream;
       error_message_stream << "Can only solve for square matrices\n"
                            << "N, M " << n << " " << m << std::endl;
  
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // Storage for the values, rows and column indices
     // required by SuplerLU
     double* value = 0;
     int *index = 0, *start = 0;
  
     // Integer used to represent compressed row or column format
     // Default compressed row
     int transpose = 0;
  
     // Number of non-zero entries in the matrix
     int nnz = 0;
  
     // Doc flag (convert to int for SuperLU)
     int doc = Doc_stats;
  
     // Is it a CR matrix
     if (dynamic_cast<CRDoubleMatrix*>(matrix_pt))
     {
       // Set the appropriate row flags
       Serial_compressed_row_flag = true;
       transpose = 1;
       // Get a cast pointer to the matrix
       CRDoubleMatrix* CR_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
  
       // Now set the pointers to the interanally stored values
       // and indices
       nnz = CR_matrix_pt->nnz();
       value = CR_matrix_pt->value();
       index = CR_matrix_pt->column_index();
       start = CR_matrix_pt->row_start();
     }
     // Otherwise is it the compressed column version?
     else if (dynamic_cast<CCDoubleMatrix*>(matrix_pt))
     {
       // Set the compressed row flag to false
       Serial_compressed_row_flag = false;
       // Get a cast pointer to the matrix
       CCDoubleMatrix* CC_matrix_pt = dynamic_cast<CCDoubleMatrix*>(matrix_pt);
  
       // Now set the pointers to the interanally stored values
       // and indices
       nnz = CC_matrix_pt->nnz();
       value = CC_matrix_pt->value();
       index = CC_matrix_pt->row_index();
       start = CC_matrix_pt->column_start();
     }
     // Otherwise throw and error
     else
     {
       throw OomphLibError("SuperLU only works with CR or CC Double matrices",
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // Clean up any previous storage so that if this is called twice with
     // the same matrix, we don't get a memory leak
     clean_up_memory();
  
     // Perform the lu decompose phase (i=1)
     int i = 1;
     Serial_sign_of_determinant_of_matrix = superlu(&i,
                                                    &n,
                                                    &nnz,
                                                    0,
                                                    value,
                                                    index,
                                                    start,
                                                    0,
                                                    &n,
                                                    &transpose,
                                                    &doc,
                                                    &Serial_f_factors,
                                                    &Serial_info);
  
     // Throw an error if superLU returned an error status in info.
     if (Serial_info != 0)
     {
       std::ostringstream error_msg;
       error_msg << "SuperLU returned the error status code " << Serial_info
                 << " . See the SuperLU documentation for what this means.";
       throw OomphLibError(
         error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
  
  
     // Set the number of degrees of freedom in the linear system
     Serial_n_dof = n;
   }
  
   //=============================================================================
   /// Do the backsubstitution for SuperLUSolver.
   /// Note - this method performs no paranoid checks - these are all performed
   /// in solve(...) and resolve(...)
   //=============================================================================
   void SuperLUSolver::backsub(const DoubleVector& rhs, DoubleVector& result)
   {
 #ifdef OOMPH_HAS_MPI
     if (Using_dist)
     {
       backsub_distributed(rhs, result);
     }
     else
 #endif
     {
       backsub_serial(rhs, result);
     }
   }
  
  
   //=============================================================================
   /// Do the backsubstitution of the transposed system for SuperLUSolver.
   /// Note - this method performs no paranoid checks - these are all performed
   /// in solve(...) and resolve(...)
   //=============================================================================
   void SuperLUSolver::backsub_transpose(const DoubleVector& rhs,
                                         DoubleVector& result)
   {
 #ifdef OOMPH_HAS_MPI
     if (Using_dist)
     {
       backsub_transpose_distributed(rhs, result);
     }
     else
 #endif
     {
       backsub_transpose_serial(rhs, result);
     }
   }
  
 #ifdef OOMPH_HAS_MPI
   //=========================================================================
   /// Static warning to suppress warnings about incorrect distribution of
   /// RHS vector. Default is false
   //=========================================================================
   bool SuperLUSolver::Suppress_incorrect_rhs_distribution_warning_in_resolve =
     false;
  
   //=============================================================================
   /// Do the backsubstitution for SuperLU solver.
   /// Note - this method performs no paranoid checks - these are all performed
   /// in solve(...) and resolve(...)
   //=============================================================================
   void SuperLUSolver::backsub_distributed(const DoubleVector& rhs,
                                           DoubleVector& result)
   {
 #ifdef PARANOID
     // check that the rhs vector is setup
     if (!rhs.distribution_pt()->built())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The vectors rhs must be setup";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
 #endif
     // check that the rhs distribution is the same as the distribution as this
     // solver. If not redistribute and issue a warning
     LinearAlgebraDistribution rhs_distribution(rhs.distribution_pt());
     if (!(*rhs.distribution_pt() == *this->distribution_pt()))
     {
       if (!Suppress_incorrect_rhs_distribution_warning_in_resolve)
       {
         std::ostringstream warning_stream;
         warning_stream << "The distribution of rhs vector does not match that "
                           "ofthe solver.\n";
         warning_stream << "The rhs will be redistributed, which is likely to  "
                           "be inefficient\n";
         warning_stream
           << "To remove this warning you can either:\n"
           << "    i) Ensure that the rhs vector has the correct distribution\n"
           << "       before calling the resolve() function\n"
           << "or ii) Set the flag \n"
           << " SuperLUSolver::Suppress_incorrect_rhs_distribution_warning_in_"
              "resolve\n"
           << "       to be true\n\n";
  
         OomphLibWarning(warning_stream.str(),
                         "SuperLUSolver::resolve()",
                         OOMPH_EXCEPTION_LOCATION);
       }
  
       // Have to cast away const-ness (which tells us that we shouldn't really
       // be doing this!)
       const_cast<DoubleVector&>(rhs).redistribute(this->distribution_pt());
     }
  
 #ifdef PARANOID
     // if the result vector is setup then check it has the same distribution
     // as the rhs
     if (result.distribution_built())
     {
       if (!(*result.distribution_pt() == *rhs.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream
           << "The result vector distribution has been setup; it must have the "
           << "same distribution as the rhs vector.";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
     // Doc (0/1) = (true/false)
     int doc = !Doc_stats;
  
     // Reset Info
     Dist_info = 0;
  
     // number of DOFs
     int ndof = this->distribution_pt()->nrow();
  
     // Copy the rhs values to result
     result = rhs;
  
     // Do the backsubsitition phase
     if (Dist_distributed_solve_data_allocated)
     {
       // Call distributed solver
       superlu_dist_distributed_matrix(
         2,
         -1,
         ndof,
         0,
         0,
         0,
         0,
         0,
         0,
         result.values_pt(),
         Dist_nprow,
         Dist_npcol,
         doc,
         &Dist_solver_data_pt,
         &Dist_info,
         this->distribution_pt()->communicator_pt()->mpi_comm());
     }
     else if (Dist_global_solve_data_allocated)
     {
       // Call global solver
       superlu_dist_global_matrix(
         2,
         -1,
         ndof,
         0,
         0,
         0,
         0,
         result.values_pt(),
         Dist_nprow,
         Dist_npcol,
         doc,
         &Dist_solver_data_pt,
         &Dist_info,
         this->distribution_pt()->communicator_pt()->mpi_comm());
     }
     else
     {
       throw OomphLibError("The matrix factors have not been stored",
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
  
     // Throw an error if superLU returned an error status in info.
     if (Dist_info != 0)
     {
       std::ostringstream error_msg;
       error_msg << "SuperLU returned the error status code " << Dist_info
                 << " . See the SuperLU documentation for what this means.";
       throw OomphLibError(
         error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
  
     // Redistribute to original distribution
     // Have to cast away const-ness (which tells us that we shouldn't really
     // be doing this!)
     const_cast<DoubleVector&>(rhs).redistribute(&rhs_distribution);
   }
  
   //=============================================================================
   /// Do the backsubstitution for SuperLU solver.
   /// Note - this method performs no paranoid checks - these are all performed
   /// in solve(...) and resolve(...)
   //=============================================================================
   void SuperLUSolver::backsub_transpose_distributed(const DoubleVector& rhs,
                                                     DoubleVector& result)
   {
     // Create an output stream
     std::ostringstream error_message_stream;
  
     // Create the error message
     error_message_stream << "This function hasn't been implemented yet. If you "
                          << "need it, implement it!" << std::endl;
  
     // Throw the error message
     throw OomphLibError(error_message_stream.str(),
                         OOMPH_CURRENT_FUNCTION,
                         OOMPH_EXCEPTION_LOCATION);
   }
 #endif
  
   //================================================================
   /// Do the backsubstitution for SuperLU
   //================================================================
   void SuperLUSolver::backsub_serial(const DoubleVector& rhs,
                                      DoubleVector& result)
   {
     // Find the number of unknowns
     int n = rhs.nrow();
  
 #ifdef PARANOID
     // PARANOID check that this rhs distribution is setup
     if (!rhs.built())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The rhs vector distribution must be setup.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the rhs has the right number of global rows
     if (static_cast<int>(Serial_n_dof) != n)
     {
       throw OomphLibError(
         "RHS does not have the same dimension as the linear system",
         OOMPH_CURRENT_FUNCTION,
         OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the rhs is not distributed
     if (rhs.distribution_pt()->distributed())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The rhs vector must not be distributed.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that if the result is setup it matches the distribution
     // of the rhs
     if (result.built())
     {
       if (!(*rhs.distribution_pt() == *result.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream << "If the result distribution is setup then it "
                                 "must be the same as the "
                              << "rhs distribution";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     // copy result to rhs
     result = rhs;
  
     // Number of RHSs
     int nrhs = 1;
  
     // Cast the boolean flags to ints for SuperLU
     int transpose = Serial_compressed_row_flag;
     int doc = Doc_stats;
  
     // Do the backsubsitition phase
     int i = 2;
     superlu(&i,
             &n,
             0,
             &nrhs,
             0,
             0,
             0,
             result.values_pt(),
             &n,
             &transpose,
             &doc,
             &Serial_f_factors,
             &Serial_info);
  
     // Throw an error if superLU returned an error status in info.
     if (Serial_info != 0)
     {
       std::ostringstream error_msg;
       error_msg << "SuperLU returned the error status code " << Serial_info
                 << " . See the SuperLU documentation for what this means.";
       throw OomphLibError(
         error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
   }
  
   //================================================================
   /// Do the backsubstitution for SuperLU
   //================================================================
   void SuperLUSolver::backsub_transpose_serial(const DoubleVector& rhs,
                                                DoubleVector& result)
   {
     // Find the number of unknowns
     int n = rhs.nrow();
  
 #ifdef PARANOID
     // PARANOID check that this rhs distribution is setup
     if (!rhs.built())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The rhs vector distribution must be setup.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the rhs has the right number of global rows
     if (static_cast<int>(Serial_n_dof) != n)
     {
       throw OomphLibError(
         "RHS does not have the same dimension as the linear system",
         OOMPH_CURRENT_FUNCTION,
         OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the rhs is not distributed
     if (rhs.distribution_pt()->distributed())
     {
       std::ostringstream error_message_stream;
       error_message_stream << "The rhs vector must not be distributed.";
       throw OomphLibError(error_message_stream.str(),
                           OOMPH_CURRENT_FUNCTION,
                           OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that if the result is setup it matches the distribution
     // of the rhs
     if (result.built())
     {
       if (!(*rhs.distribution_pt() == *result.distribution_pt()))
       {
         std::ostringstream error_message_stream;
         error_message_stream << "If the result distribution is setup then it "
                                 "must be the same as the "
                              << "rhs distribution";
         throw OomphLibError(error_message_stream.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
     }
 #endif
  
     // copy result to rhs
     result = rhs;
  
     // Number of RHSs
     int nrhs = 1;
  
     // Cast the boolean flags to ints for SuperLU
     int transpose = (!Serial_compressed_row_flag);
     int doc = Doc_stats;
  
     // Do the backsubsitition phase
     int i = 2;
     superlu(&i,
             &n,
             0,
             &nrhs,
             0,
             0,
             0,
             result.values_pt(),
             &n,
             &transpose,
             &doc,
             &Serial_f_factors,
             &Serial_info);
  
     // Throw an error if superLU returned an error status in info.
     if (Serial_info != 0)
     {
       std::ostringstream error_msg;
       error_msg << "SuperLU returned the error status code " << Serial_info
                 << " . See the SuperLU documentation for what this means.";
       throw OomphLibError(
         error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
   }
  
   //=============================================================================
   /// Clean up the memory
   //=============================================================================
   void SuperLUSolver::clean_up_memory()
   {
     // If we have non-zero LU factors stored
     if (Serial_f_factors != 0)
     {
       // Clean up those factors
       int i = 3;
       int transpose = Serial_compressed_row_flag;
       superlu(&i,
               0,
               0,
               0,
               0,
               0,
               0,
               0,
               0,
               &transpose,
               0,
               &Serial_f_factors,
               &Serial_info);
  
       // Set the F_factors to zero
       Serial_f_factors = 0;
       Serial_n_dof = 0;
     }
  
 #ifdef OOMPH_HAS_MPI
     // If we have non-zero LU factors stored
     if (Dist_solver_data_pt != 0)
     {
       // Clean up any stored solver data
  
       // Doc (0/1) = (true/false)
       int doc = !Doc_stats;
  
       // Reset Info flag
       Dist_info = 0;
  
       // number of DOFs
       int ndof = this->distribution_pt()->nrow();
  
       if (Dist_distributed_solve_data_allocated)
       {
         superlu_dist_distributed_matrix(
           3,
           -1,
           ndof,
           0,
           0,
           0,
           0,
           0,
           0,
           0,
           Dist_nprow,
           Dist_npcol,
           doc,
           &Dist_solver_data_pt,
           &Dist_info,
           this->distribution_pt()->communicator_pt()->mpi_comm());
         Dist_distributed_solve_data_allocated = false;
       }
       if (Dist_global_solve_data_allocated)
       {
         superlu_dist_global_matrix(
           3,
           -1,
           ndof,
           0,
           0,
           0,
           0,
           0,
           Dist_nprow,
           Dist_npcol,
           doc,
           &Dist_solver_data_pt,
           &Dist_info,
           this->distribution_pt()->communicator_pt()->mpi_comm());
         Dist_global_solve_data_allocated = false;
       }
  
       Dist_solver_data_pt = 0;
  
       // Delete internal copy of the matrix
       delete[] Dist_value_pt;
       delete[] Dist_index_pt;
       delete[] Dist_start_pt;
       Dist_value_pt = 0;
       Dist_index_pt = 0;
       Dist_start_pt = 0;
  
       // and the distribution
       this->clear_distribution();
     }
 #endif
   }
  
 } // namespace oomph