oomph-lib: double_vector.cc Source File

Go to the documentation of this file.
 // LIC// ====================================================================
 // LIC// This file forms part of oomph-lib, the object-oriented,
 // LIC// multi-physics finite-element library, available
 // LIC// at http://www.oomph-lib.org.
 // LIC//
 // LIC// Copyright (C) 2006-2023 Matthias Heil and Andrew Hazel
 // LIC//
 // LIC// This library is free software; you can redistribute it and/or
 // LIC// modify it under the terms of the GNU Lesser General Public
 // LIC// License as published by the Free Software Foundation; either
 // LIC// version 2.1 of the License, or (at your option) any later version.
 // LIC//
 // LIC// This library is distributed in the hope that it will be useful,
 // LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
 // LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // LIC// Lesser General Public License for more details.
 // LIC//
 // LIC// You should have received a copy of the GNU Lesser General Public
 // LIC// License along with this library; if not, write to the Free Software
 // LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 // LIC// 02110-1301  USA.
 // LIC//
 // LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
 // LIC//
 // LIC//====================================================================
 #include "double_vector.h"
 #include "matrices.h"
  
  
 namespace oomph
 {
   //============================================================================
   /// Just copys the argument DoubleVector
   //============================================================================
   void DoubleVector::build(const DoubleVector& old_vector)
   {
     if (!(*this == old_vector))
     {
       // the vector owns the internal data
       Internal_values = true;
  
       // reset the distribution and resize the data
       this->build(old_vector.distribution_pt(), 0.0);
  
       // copy the data
       if (this->distribution_built())
       {
         unsigned nrow_local = this->nrow_local();
         const double* old_vector_values = old_vector.values_pt();
         std::copy(old_vector_values, old_vector_values + nrow_local, Values_pt);
       }
     }
   }
  
   //============================================================================
   /// Assembles a DoubleVector with distribution dist, if v is specified
   /// each row is set to v
   //============================================================================
   void DoubleVector::build(const LinearAlgebraDistribution* const& dist_pt,
                            const double& v)
   {
     // clean the memory
     this->clear();
  
     // the vector owns the internal data
     Internal_values = true;
  
     // Set the distribution
     this->build_distribution(dist_pt);
  
     // update the values
     if (dist_pt->built())
     {
       unsigned nrow_local = this->nrow_local();
       Values_pt = new double[nrow_local];
  
       std::fill_n(Values_pt, nrow_local, v);
       Built = true;
     }
     else
     {
       Built = false;
     }
   }
  
   //============================================================================
   /// Assembles a DoubleVector with a distribution dist and coefficients
   /// taken from the vector v.
   /// Note. The vector v MUST be of length nrow()
   //============================================================================
   void DoubleVector::build(const LinearAlgebraDistribution* const& dist_pt,
                            const Vector<double>& v)
   {
     // clean the memory
     this->clear();
  
     // the vector owns the internal data
     Internal_values = true;
  
     // Set the distribution
     this->build_distribution(dist_pt);
  
     // update the values
     if (dist_pt->built())
     {
       // re-allocate memory which was deleted by clear()
       unsigned nrow_local = this->nrow_local();
       Values_pt = new double[nrow_local];
  
       // use the initialise method to populate the vector
       this->initialise(v);
       Built = true;
     }
     else
     {
       Built = false;
     }
   }
  
   //============================================================================
   /// initialise the whole vector with value v
   //============================================================================
   void DoubleVector::initialise(const double& v)
   {
     if (Built)
     {
       // cache nrow local
       unsigned nrow_local = this->nrow_local();
  
       std::fill_n(Values_pt, nrow_local, v);
     }
   }
  
   //============================================================================
   /// initialise the vector with coefficient from the vector v.
   /// Note: The vector v must be of length
   //============================================================================
   void DoubleVector::initialise(const Vector<double> v)
   {
 #ifdef PARANOID
     if (v.size() != this->nrow())
     {
       std::ostringstream error_message;
       error_message << "The vector passed to initialise(...) must be of length "
                     << "nrow()";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
     unsigned begin_first_row = this->first_row();
     unsigned end = begin_first_row + this->nrow_local();
  
     std::copy(v.begin() + begin_first_row, v.begin() + end, Values_pt);
   }
  
   //============================================================================
   /// The contents of the vector are redistributed to match the new
   /// distribution. In a non-MPI build this method works, but does nothing.
   /// \b NOTE 1: The current distribution and the new distribution must have
   /// the same number of global rows.
   /// \b NOTE 2: The current distribution and the new distribution must have
   /// the same Communicator.
   //============================================================================
   void DoubleVector::redistribute(
     const LinearAlgebraDistribution* const& dist_pt)
   {
 #ifdef OOMPH_HAS_MPI
 #ifdef PARANOID
     if (!Internal_values)
     {
       // if this vector does not own the double* values then it cannot be
       // distributed.
       // note: this is not stictly necessary - would just need to be careful
       // with delete[] below.
       std::ostringstream error_message;
       error_message << "This vector does not own its data (i.e. it has been "
                     << "passed in via set_external_values() and therefore "
                     << "cannot be redistributed";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     // paranoid check that the nrows for both distributions is the
     // same
     if (dist_pt->nrow() != this->nrow())
     {
       std::ostringstream error_message;
       error_message << "The number of global rows in the new distribution ("
                     << dist_pt->nrow() << ") is not equal to the number"
                     << " of global rows in the current distribution ("
                     << this->nrow() << ").\n";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     // paranoid check that the current distribution and the new distribution
     // have the same Communicator
     OomphCommunicator temp_comm(*dist_pt->communicator_pt());
     if (!(temp_comm == *this->distribution_pt()->communicator_pt()))
     {
       std::ostringstream error_message;
       error_message << "The new distribution and the current distribution must "
                     << "have the same communicator.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // check the distributions are not the same
     if (!((*this->distribution_pt()) == *dist_pt))
     {
       // get the rank and the number of processors
       int my_rank = this->distribution_pt()->communicator_pt()->my_rank();
       int nproc = this->distribution_pt()->communicator_pt()->nproc();
  
       // if both vectors are distributed
       if (this->distributed() && dist_pt->distributed())
       {
         // new nrow_local and first_row data
         Vector<unsigned> new_first_row_data(nproc);
         Vector<unsigned> new_nrow_local_data(nproc);
         Vector<unsigned> current_first_row_data(nproc);
         Vector<unsigned> current_nrow_local_data(nproc);
         for (int i = 0; i < nproc; i++)
         {
           new_first_row_data[i] = dist_pt->first_row(i);
           new_nrow_local_data[i] = dist_pt->nrow_local(i);
           current_first_row_data[i] = this->first_row(i);
           current_nrow_local_data[i] = this->nrow_local(i);
         }
  
         // compute which local rows are expected to be received from each
         // processor / sent to each processor
         Vector<unsigned> new_first_row_for_proc(nproc);
         Vector<unsigned> new_nrow_local_for_proc(nproc);
         Vector<unsigned> new_first_row_from_proc(nproc);
         Vector<unsigned> new_nrow_local_from_proc(nproc);
  
         // for every processor compute first_row and nrow_local that will
         // will sent and received by this processor
         for (int p = 0; p < nproc; p++)
         {
           // start with data to be sent
           if ((new_first_row_data[p] < (current_first_row_data[my_rank] +
                                         current_nrow_local_data[my_rank])) &&
               (current_first_row_data[my_rank] <
                (new_first_row_data[p] + new_nrow_local_data[p])))
           {
             new_first_row_for_proc[p] =
               std::max(current_first_row_data[my_rank], new_first_row_data[p]);
             new_nrow_local_for_proc[p] =
               std::min((current_first_row_data[my_rank] +
                         current_nrow_local_data[my_rank]),
                        (new_first_row_data[p] + new_nrow_local_data[p])) -
               new_first_row_for_proc[p];
           }
  
           // and data to be received
           if ((new_first_row_data[my_rank] <
                (current_first_row_data[p] + current_nrow_local_data[p])) &&
               (current_first_row_data[p] <
                (new_first_row_data[my_rank] + new_nrow_local_data[my_rank])))
           {
             new_first_row_from_proc[p] =
               std::max(current_first_row_data[p], new_first_row_data[my_rank]);
             new_nrow_local_from_proc[p] =
               std::min(
                 (current_first_row_data[p] + current_nrow_local_data[p]),
                 (new_first_row_data[my_rank] + new_nrow_local_data[my_rank])) -
               new_first_row_from_proc[p];
           }
         }
  
         // temporary storage for the new data
         double* temp_data = new double[new_nrow_local_data[my_rank]];
  
         // "send to self" or copy Data that does not need to be sent else where
         // to temp_data
         if (new_nrow_local_for_proc[my_rank] != 0)
         {
           unsigned j =
             new_first_row_for_proc[my_rank] - current_first_row_data[my_rank];
           unsigned k =
             new_first_row_for_proc[my_rank] - new_first_row_data[my_rank];
           for (unsigned i = 0; i < new_nrow_local_for_proc[my_rank]; i++)
           {
             temp_data[k + i] = Values_pt[j + i];
           }
         }
  
         // send and receive circularly
         for (int p = 1; p < nproc; p++)
         {
           // next processor to send to
           unsigned dest_p = (my_rank + p) % nproc;
  
           // next processor to receive from
           unsigned source_p = (nproc + my_rank - p) % nproc;
  
           // send and receive the value
           MPI_Status status;
           MPI_Sendrecv(Values_pt + new_first_row_for_proc[dest_p] -
                          current_first_row_data[my_rank],
                        new_nrow_local_for_proc[dest_p],
                        MPI_DOUBLE,
                        dest_p,
                        1,
                        temp_data + new_first_row_from_proc[source_p] -
                          new_first_row_data[my_rank],
                        new_nrow_local_from_proc[source_p],
                        MPI_DOUBLE,
                        source_p,
                        1,
                        this->distribution_pt()->communicator_pt()->mpi_comm(),
                        &status);
         }
  
         // copy from temp data to Values_pt
         delete[] Values_pt;
         unsigned nrow_local = dist_pt->nrow_local();
         Values_pt = new double[nrow_local];
         for (unsigned i = 0; i < nrow_local; i++)
         {
           Values_pt[i] = temp_data[i];
         }
         delete[] temp_data;
       }
  
       // if this vector is distributed but the new distributed is global
       else if (this->distributed() && !dist_pt->distributed())
       {
         // copy existing Values_pt to temp_data
         unsigned nrow_local = this->nrow_local();
         double* temp_data = new double[nrow_local];
         for (unsigned i = 0; i < nrow_local; i++)
         {
           temp_data[i] = Values_pt[i];
         }
  
         // clear and resize Values_pt
         delete[] Values_pt;
         Values_pt = new double[this->nrow()];
  
         // create a int vector of first rows
         int* dist_first_row = new int[nproc];
         int* dist_nrow_local = new int[nproc];
         for (int p = 0; p < nproc; p++)
         {
           dist_first_row[p] = this->first_row(p);
           dist_nrow_local[p] = this->nrow_local(p);
         }
  
         // gather the local vectors from all processors on all processors
         int my_nrow_local(this->nrow_local());
         MPI_Allgatherv(temp_data,
                        my_nrow_local,
                        MPI_DOUBLE,
                        Values_pt,
                        dist_nrow_local,
                        dist_first_row,
                        MPI_DOUBLE,
                        this->distribution_pt()->communicator_pt()->mpi_comm());
  
         // update the distribution
         this->build_distribution(dist_pt);
  
         // delete the temp_data
         delete[] temp_data;
  
         // clean up
         delete[] dist_first_row;
         delete[] dist_nrow_local;
       }
  
       // if this vector is not distrubted but the target vector is
       else if (!this->distributed() && dist_pt->distributed())
       {
         // cache the new nrow_local
         unsigned nrow_local = dist_pt->nrow_local();
  
         // and first_row
         unsigned first_row = dist_pt->first_row();
  
         // temp storage for the new data
         double* temp_data = new double[nrow_local];
  
         // copy the data
         for (unsigned i = 0; i < nrow_local; i++)
         {
           temp_data[i] = Values_pt[first_row + i];
         }
  
         // copy to Values_pt
         delete[] Values_pt;
         Values_pt = temp_data;
  
         // update the distribution
         this->build_distribution(dist_pt);
       }
  
       // copy the Distribution
       this->build_distribution(dist_pt);
     }
 #endif
   }
  
   //============================================================================
   /// [] access function to the (local) values of this vector
   //============================================================================
   double& DoubleVector::operator[](int i)
   {
 #ifdef RANGE_CHECKING
     if (i >= int(this->nrow_local()))
     {
       std::ostringstream error_message;
       error_message << "Range Error: " << i << " is not in the range (0,"
                     << this->nrow_local() - 1 << ")";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
     return Values_pt[i];
   }
  
   //============================================================================
   /// == operator
   //============================================================================
   bool DoubleVector::operator==(const DoubleVector& v)
   {
     // if v is not setup return false
     if (v.built() && !this->built())
     {
       return false;
     }
     else if (!v.built() && this->built())
     {
       return false;
     }
     else if (!v.built() && !this->built())
     {
       return true;
     }
     else
     {
       const double* v_values_pt = v.values_pt();
       unsigned nrow_local = this->nrow_local();
       for (unsigned i = 0; i < nrow_local; i++)
       {
         if (Values_pt[i] != v_values_pt[i])
         {
           return false;
         }
       }
       return true;
     }
   }
  
   //============================================================================
   /// += operator
   //============================================================================
   void DoubleVector::operator+=(const DoubleVector& v)
   {
 #ifdef PARANOID
     // PARANOID check that this vector is setup
     if (!this->built())
     {
       std::ostringstream error_message;
       error_message << "This vector must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the vector v is setup
     if (!v.built())
     {
       std::ostringstream error_message;
       error_message << "The vector v must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the vectors have the same distribution
     if (!(*v.distribution_pt() == *this->distribution_pt()))
     {
       std::ostringstream error_message;
       error_message << "The vector v and this vector must have the same "
                     << "distribution.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif //
  
     // cache nrow_local
     double* v_values_pt = v.values_pt();
     unsigned nrow_local = this->nrow_local();
  
     // Decided to keep this as a loop rather than use std::transform, because
     // this is a very simple loop and should compile to the same code.
     for (unsigned i = 0; i < nrow_local; i++)
     {
       Values_pt[i] += v_values_pt[i];
     }
   }
  
   //============================================================================
   /// -= operator
   //============================================================================
   void DoubleVector::operator-=(const DoubleVector& v)
   {
 #ifdef PARANOID
     // PARANOID check that this vector is setup
     if (!this->distribution_built())
     {
       std::ostringstream error_message;
       error_message << "This vector must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the vector v is setup
     if (!v.built())
     {
       std::ostringstream error_message;
       error_message << "The vector v must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     // PARANOID check that the vectors have the same distribution
     if (!(*v.distribution_pt() == *this->distribution_pt()))
     {
       std::ostringstream error_message;
       error_message << "The vector v and this vector must have the same "
                     << "distribution.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // cache nrow_local
     double* v_values_pt = v.values_pt();
     unsigned nrow_local = this->nrow_local();
  
     // Decided to keep this as a loop rather than use std::transform, because
     // this is a very simple loop and should compile to the same code.
     for (unsigned i = 0; i < nrow_local; i++)
     {
       Values_pt[i] -= v_values_pt[i];
     }
   }
  
  
   //============================================================================
   /// Multiply by double
   //============================================================================
   void DoubleVector::operator*=(const double& d)
   {
 #ifdef PARANOID
     if (!this->distribution_built())
     {
       std::ostringstream error_msg;
       error_msg << "DoubleVector must be set up.";
       throw OomphLibError(
         error_msg.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // Decided to keep this as a loop rather than use std::transform, because
     // this is a very simple loop and should compile to the same code.
     for (unsigned i = 0, ni = this->nrow_local(); i < ni; i++)
     {
       Values_pt[i] *= d;
     }
   }
  
   //============================================================================
   /// Divide by double
   //============================================================================
   void DoubleVector::operator/=(const double& d)
   {
     // PARANOID checks are done inside operator *=
  
     // Decided to keep this as a loop rather than use std::transform, because
     // this is a very simple loop and should compile to the same code.
     double divisor = (1.0 / d);
     this->operator*=(divisor);
   }
  
   //============================================================================
   /// [] access function to the (local) values of this vector
   //============================================================================
   const double& DoubleVector::operator[](int i) const
   {
 #ifdef RANGE_CHECKING
     if (i >= int(this->nrow_local()))
     {
       std::ostringstream error_message;
       error_message << "Range Error: " << i << " is not in the range (0,"
                     << this->nrow_local() - 1 << ")";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
     return Values_pt[i];
   }
  
   //============================================================================
   /// returns the maximum coefficient
   //============================================================================
   double DoubleVector::max() const
   {
     // the number of local rows
     unsigned nrow = this->nrow_local();
  
     // get the local maximum
     double max = 0.0;
     for (unsigned i = 0; i < nrow; i++)
     {
       if (std::fabs(Values_pt[i]) > std::fabs(max))
       {
         max = std::fabs(Values_pt[i]);
       }
     }
  
     // now return the maximum
 #ifdef OOMPH_HAS_MPI
     // if this vector is not distributed then the local maximum is the global
     // maximum
     if (!this->distributed())
     {
       return max;
     }
     // else if the vector is distributed but only on a single processor
     // then the local maximum is the global maximum
     else if (this->distribution_pt()->communicator_pt()->nproc() == 1)
     {
       return max;
     }
     // otherwise use MPI_Allreduce to find the global maximum
     else
     {
       double local_max = max;
       MPI_Allreduce(&local_max,
                     &max,
                     1,
                     MPI_DOUBLE,
                     MPI_MAX,
                     this->distribution_pt()->communicator_pt()->mpi_comm());
       return max;
     }
 #else
     return max;
 #endif
   }
  
   //============================================================================
   /// output the contents of the vector
   //============================================================================
   void DoubleVector::output(std::ostream& outfile,
                             const int& output_precision) const
   {
     // temp pointer to values
     double* temp;
  
     // number of global row
     unsigned nrow = this->nrow();
  
 #ifdef OOMPH_HAS_MPI
  
     // number of local rows
     int nrow_local = this->nrow_local();
  
     // gather from all processors
     if (this->distributed() &&
         this->distribution_pt()->communicator_pt()->nproc() > 1)
     {
       // number of processors
       int nproc = this->distribution_pt()->communicator_pt()->nproc();
  
       // number of gobal row
       unsigned nrow = this->nrow();
  
       // get the vector of first_row s and nrow_local s
       int* dist_first_row = new int[nproc];
       int* dist_nrow_local = new int[nproc];
       for (int p = 0; p < nproc; p++)
       {
         dist_first_row[p] = this->first_row(p);
         dist_nrow_local[p] = this->nrow_local(p);
       }
  
       // gather
       temp = new double[nrow];
       MPI_Allgatherv(Values_pt,
                      nrow_local,
                      MPI_DOUBLE,
                      temp,
                      dist_nrow_local,
                      dist_first_row,
                      MPI_DOUBLE,
                      this->distribution_pt()->communicator_pt()->mpi_comm());
  
       // clean up
       delete[] dist_first_row;
       delete[] dist_nrow_local;
     }
     else
     {
       temp = Values_pt;
     }
 #else
     temp = Values_pt;
 #endif
  
     // output
     // Store the precision so we can revert it.
     std::streamsize old_precision = 0;
     if (output_precision > 0)
     {
       old_precision = outfile.precision();
       outfile << std::setprecision(output_precision);
     }
  
     for (unsigned i = 0; i < nrow; i++)
     {
       outfile << i << " " << temp[i] << std::endl;
     }
  
     // Revert the precision.
     if (output_precision > 0)
     {
       outfile << std::setprecision(old_precision);
     }
  
     // clean up if requires
 #ifdef OOMPH_HAS_MPI
     if (this->distributed() &&
         this->distribution_pt()->communicator_pt()->nproc() > 1)
     {
       delete[] temp;
     }
 #endif
   }
  
   //============================================================================
   /// output the local contents of the vector
   //============================================================================
   void DoubleVector::output_local_values(std::ostream& outfile,
                                          const int& output_precision) const
   {
     // Number of local rows.
     unsigned nrow_local = this->nrow_local();
  
     // output
     // Store the precision so we can revert it.
     std::streamsize old_precision = 0;
     if (output_precision > 0)
     {
       old_precision = outfile.precision();
       outfile << std::setprecision(output_precision);
     }
  
     for (unsigned i = 0; i < nrow_local; i++)
     {
       outfile << i << " " << Values_pt[i] << std::endl;
     }
  
     // Revert the precision.
     if (output_precision > 0)
     {
       outfile << std::setprecision(old_precision);
     }
   }
  
   //============================================================================
   /// output the local contents of the vector with the first row offset.
   //============================================================================
   void DoubleVector::output_local_values_with_offset(
     std::ostream& outfile, const int& output_precision) const
   {
     // Number of local rows.
     unsigned nrow_local = this->nrow_local();
  
     // First row on this processor.
     unsigned first_row = this->first_row();
  
     // output
     // Store the precision so we can revert it.
     std::streamsize old_precision = 0;
     if (output_precision > 0)
     {
       old_precision = outfile.precision();
       outfile << std::setprecision(output_precision);
     }
  
     for (unsigned i = 0; i < nrow_local; i++)
     {
       outfile << (i + first_row) << " " << Values_pt[i] << std::endl;
     }
  
     // Revert the precision.
     if (output_precision > 0)
     {
       outfile << std::setprecision(old_precision);
     }
   }
  
   //============================================================================
   /// compute the dot product of this vector with the vector vec
   //============================================================================
   double DoubleVector::dot(const DoubleVector& vec) const
   {
 #ifdef PARANOID
     // paranoid check that the vector is setup
     if (!this->built())
     {
       std::ostringstream error_message;
       error_message << "This vector must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     if (!vec.built())
     {
       std::ostringstream error_message;
       error_message << "The input vector be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     if (*this->distribution_pt() != *vec.distribution_pt())
     {
       std::ostringstream error_message;
       error_message << "The distribution of this vector and the vector vec "
                     << "must be the same."
                     << "\n\n  this: " << *this->distribution_pt()
                     << "\n  vec:  " << *vec.distribution_pt();
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // compute the local norm
     unsigned nrow_local = this->nrow_local();
     double n = 0.0;
     const double* vec_values_pt = vec.values_pt();
     for (unsigned i = 0; i < nrow_local; i++)
     {
       n += Values_pt[i] * vec_values_pt[i];
     }
  
     // if this vector is distributed and on multiple processors then gather
 #ifdef OOMPH_HAS_MPI
     double n2 = n;
     if (this->distributed() &&
         this->distribution_pt()->communicator_pt()->nproc() > 1)
     {
       MPI_Allreduce(&n,
                     &n2,
                     1,
                     MPI_DOUBLE,
                     MPI_SUM,
                     this->distribution_pt()->communicator_pt()->mpi_comm());
     }
     n = n2;
 #endif
  
     // and return;
     return n;
   }
  
   //============================================================================
   /// compute the 2 norm of this vector
   //============================================================================
   double DoubleVector::norm() const
   {
 #ifdef PARANOID
     // paranoid check that the vector is setup
     if (!this->built())
     {
       std::ostringstream error_message;
       error_message << "This vector must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // compute the local norm
     unsigned nrow_local = this->nrow_local();
     double n = 0;
     for (unsigned i = 0; i < nrow_local; i++)
     {
       n += Values_pt[i] * Values_pt[i];
     }
  
     // if this vector is distributed and on multiple processors then gather
 #ifdef OOMPH_HAS_MPI
     double n2 = n;
     if (this->distributed() &&
         this->distribution_pt()->communicator_pt()->nproc() > 1)
     {
       MPI_Allreduce(&n,
                     &n2,
                     1,
                     MPI_DOUBLE,
                     MPI_SUM,
                     this->distribution_pt()->communicator_pt()->mpi_comm());
     }
     n = n2;
 #endif
  
     // sqrt the norm
     n = sqrt(n);
  
     // and return
     return n;
   }
  
   //============================================================================
   /// compute the A-norm using the matrix at matrix_pt
   //============================================================================
   double DoubleVector::norm(const CRDoubleMatrix* matrix_pt) const
   {
 #ifdef PARANOID
     // paranoid check that the vector is setup
     if (!this->built())
     {
       std::ostringstream error_message;
       error_message << "This vector must be setup.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     if (!matrix_pt->built())
     {
       std::ostringstream error_message;
       error_message << "The input matrix be built.";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
     if (*this->distribution_pt() != *matrix_pt->distribution_pt())
     {
       std::ostringstream error_message;
       error_message << "The distribution of this vector and the matrix at "
                     << "matrix_pt must be the same";
       throw OomphLibError(
         error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
     }
 #endif
  
     // compute the matrix norm
     DoubleVector x(this->distribution_pt(), 0.0);
     matrix_pt->multiply(*this, x);
     return sqrt(this->dot(x));
   }
  
   /// output operator
   std::ostream& operator<<(std::ostream& out, const DoubleVector& v)
   {
     // Do the first value outside the loop to get the ", "s right.
     out << "[" << v[0];
  
     for (unsigned i = 1, ni = v.nrow_local(); i < ni; i++)
     {
       out << ", " << v[i];
     }
     out << "]";
  
     return out;
   }
  
   //=================================================================
   /// Namespace for helper functions for DoubleVectors
   //=================================================================
   namespace DoubleVectorHelpers
   {
     //===========================================================================
     /// Concatenate DoubleVectors.
     /// Takes a Vector of DoubleVectors. If the out vector is built, we will not
     /// build a new distribution. Otherwise we build a uniform distribution.
     ///
     /// The rows of the out vector is seen "as it is" in the in vectors.
     /// For example, if we have DoubleVectors with distributions A and B,
     /// distributed across two processors (p0 and p1),
     ///
     /// A: [a0] (on p0)    B: [b0] (on p0)
     ///    [a1] (on p1)       [b1] (on P1),
     ///
     /// then the out_vector is
     ///
     /// [a0  (on p0)
     ///  a1] (on p0)
     /// [b0]  (on p1)
     ///  b1] (on p1),
     ///
     /// Communication is required between processors. The sum of the global
     /// number of rows in the in vectors must equal to the global number of rows
     /// in the out vector. This condition must be met if one is to supply an out
     /// vector with a distribution, otherwise we can let the function generate
     /// the out vector distribution itself.
     //===========================================================================
     void concatenate(const Vector<DoubleVector*>& in_vector_pt,
                      DoubleVector& out_vector)
     {
       // How many in vectors to concatenate?
       unsigned nvectors = in_vector_pt.size();
  
       // PARANIOD checks which involves the in vectors only
 #ifdef PARANOID
       // Check that there is at least one vector.
       if (nvectors == 0)
       {
         std::ostringstream error_message;
         error_message << "There is no vector to concatenate...\n"
                       << "Perhaps you forgot to fill in_vector_pt?\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Does this vector need concatenating?
       if (nvectors == 1)
       {
         std::ostringstream warning_message;
         warning_message << "There is only one vector to concatenate...\n"
                         << "This does not require concatenating...\n";
         OomphLibWarning(warning_message.str(),
                         OOMPH_CURRENT_FUNCTION,
                         OOMPH_EXCEPTION_LOCATION);
       }
  
       // Check that all the DoubleVectors in in_vector_pt are built
       for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
       {
         if (!in_vector_pt[vec_i]->built())
         {
           std::ostringstream error_message;
           error_message << "The vector in position " << vec_i
                         << " is not built.\n"
                         << "I cannot concatenate an unbuilt vector.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
 #endif
  
       // The communicator pointer for the first in vector.
       const OomphCommunicator* const comm_pt =
         in_vector_pt[0]->distribution_pt()->communicator_pt();
  
       // Check if the first in vector is distributed.
       bool distributed = in_vector_pt[0]->distributed();
  
       // If the out vector is not built, build it with a uniform distribution.
       if (!out_vector.built())
       {
         // Nrow for the out vector is the sum of the nrow of the in vectors.
         unsigned tmp_nrow = 0;
         for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
         {
           tmp_nrow += in_vector_pt[vec_i]->nrow();
         }
  
         // Build the out vector with uniform distribution.
         out_vector.build(
           LinearAlgebraDistribution(comm_pt, tmp_nrow, distributed), 0.0);
       }
       else
       {
 #ifdef PARANOID
         // Check that the sum of nrow of in vectors match the nrow in the out
         // vectors.
         unsigned in_nrow = 0;
         for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
         {
           in_nrow += in_vector_pt[vec_i]->nrow();
         }
  
         if (in_nrow != out_vector.nrow())
         {
           std::ostringstream error_message;
           error_message << "The sum of nrow of the in vectors does not match\n"
                         << "the nrow of the out vector.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
 #endif
       }
  
 #ifdef PARANOID
       // Check that all communicators of the vectors to concatenate are the same
       // by comparing all communicators against the out vector.
       const OomphCommunicator out_comm =
         *(out_vector.distribution_pt()->communicator_pt());
  
       for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
       {
         // Get the Communicator for the current vector.
         const OomphCommunicator in_comm =
           *(in_vector_pt[vec_i]->distribution_pt()->communicator_pt());
  
         if (out_comm != in_comm)
         {
           std::ostringstream error_message;
           error_message << "The vector in position " << vec_i << " has a\n"
                         << "different communicator from the out vector.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
       // Check that the distributed boolean is the same for all vectors.
       if (out_comm.nproc() != 1)
       {
         const bool out_distributed = out_vector.distributed();
         for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
         {
           if (out_distributed != in_vector_pt[vec_i]->distributed())
           {
             std::ostringstream error_message;
             error_message << "The vector in position " << vec_i << " has a\n"
                           << "different distributed boolean from "
                           << "the out vector.\n";
             throw OomphLibError(error_message.str(),
                                 OOMPH_CURRENT_FUNCTION,
                                 OOMPH_EXCEPTION_LOCATION);
           }
         }
       }
 #endif
  
  
       // Now we do the concatenation.
       if ((comm_pt->nproc() == 1) || !distributed)
       {
         // Serial version of the code.
         // This is trivial, we simply loop through the in vectors and
         // fill in the out vector.
  
         // Out vector index.
         unsigned out_i = 0;
  
         // Out vector values.
         double* out_value_pt = out_vector.values_pt();
  
         // Loop through the in vectors.
         for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
         {
           // Nrow of current in vector.
           unsigned in_nrow = in_vector_pt[vec_i]->nrow();
  
           // In vector values.
           double* in_value_pt = in_vector_pt[vec_i]->values_pt();
  
           // Loop through the entries of this in vector.
           for (unsigned i = 0; i < in_nrow; i++)
           {
             out_value_pt[out_i++] = in_value_pt[i];
           }
         }
       }
       // Otherwise we are dealing with a distributed vector.
       else
       {
 #ifdef OOMPH_HAS_MPI
         // Get the number of processors
         unsigned nproc = comm_pt->nproc();
  
         // My rank
         unsigned my_rank = comm_pt->my_rank();
  
         // Storage for the data (per processor) to send
         Vector<Vector<double>> values_to_send(nproc);
  
         // The sum of the nrow for the in vectors (so far). This is used as an
         // offset to calculate the global equation number in the out vector
         unsigned long sum_of_vec_nrow = 0;
  
         // Loop over the in vectors and work out:
         // out_p: the rank the of receiving processor
         // out_local_eqn: the local equation number of the receiving processor
         //
         // Then put the value and out_local_eqn at out_p in values_to_send
  
         LinearAlgebraDistribution* out_distribution_pt =
           out_vector.distribution_pt();
         for (unsigned in_vec_i = 0; in_vec_i < nvectors; in_vec_i++)
         {
           // Loop through the local equations
           unsigned in_vec_nrow_local = in_vector_pt[in_vec_i]->nrow_local();
           unsigned in_vec_first_row = in_vector_pt[in_vec_i]->first_row();
  
           for (unsigned in_row_i = 0; in_row_i < in_vec_nrow_local; in_row_i++)
           {
             // Calculate the global equation number for this in_row_i
             unsigned out_global_eqn =
               in_row_i + in_vec_first_row + sum_of_vec_nrow;
  
             // Get the processor that this global row belongs to.
             // The rank_of_global_row(...) function loops through all the
             // processors and does two unsigned comparisons. Since we have to do
             // this for every row, it may be better to store a list mapping for
             // very large number of processors.
             unsigned out_p =
               out_distribution_pt->rank_of_global_row(out_global_eqn);
             //         unsigned out_p = out_distribution_pt
             //           ->rank_of_global_row_map(out_global_eqn);
  
             // Knowing out_p enables us to work out the out_first_row and
             // out_local_eqn.
             unsigned out_first_row = out_distribution_pt->first_row(out_p);
             unsigned out_local_eqn = out_global_eqn - out_first_row;
  
             // Now push back the out_local_eqn and the value
             values_to_send[out_p].push_back(out_local_eqn);
             values_to_send[out_p].push_back(
               (*in_vector_pt[in_vec_i])[in_row_i]);
           }
  
           // Update the offset.
           sum_of_vec_nrow += in_vector_pt[in_vec_i]->nrow();
         }
  
         // Prepare to send the data!
  
         // Storage for the number of data to be sent to each processor.
         Vector<int> send_n(nproc, 0);
  
         // Storage for all the values to be send to each processor.
         Vector<double> send_values_data;
  
         // Storage location within send_values_data
         Vector<int> send_displacement(nproc, 0);
  
         // Get the total amount of data which needs to be sent, so we can
         // reserve space for it.
         unsigned total_ndata = 0;
         for (unsigned rank = 0; rank < nproc; rank++)
         {
           if (rank != my_rank)
           {
             total_ndata += values_to_send[rank].size();
           }
         }
  
         // Now we don't have to re-allocate data/memory when push_back is
         // called. Nb. Using push_back without reserving memory may cause
         // multiple re-allocation behind the scenes, this is expensive.
         send_values_data.reserve(total_ndata);
  
         // Loop over all the processors to "flat pack" the data for sending.
         for (unsigned rank = 0; rank < nproc; rank++)
         {
           // Set the offset for the current processor
           send_displacement[rank] = send_values_data.size();
  
           // Don't bother to do anything if
           // the processor in the loop is the current processor.
           if (rank != my_rank)
           {
             // Put the values into the send data vector.
             unsigned n_data = values_to_send[rank].size();
             for (unsigned j = 0; j < n_data; j++)
             {
               send_values_data.push_back(values_to_send[rank][j]);
             } // Loop over the data
           } // if rank != my_rank
  
           // Find the number of data to be added to the vector.
           send_n[rank] = send_values_data.size() - send_displacement[rank];
         } // Loop over processors
  
         // Storage for the number of data to be received from each processor.
         Vector<int> receive_n(nproc, 0);
         MPI_Alltoall(&send_n[0],
                      1,
                      MPI_INT,
                      &receive_n[0],
                      1,
                      MPI_INT,
                      comm_pt->mpi_comm());
  
         // Prepare the data to be received
         // by working out the displacement from the received data.
         Vector<int> receive_displacement(nproc, 0);
         int receive_data_count = 0;
         for (unsigned rank = 0; rank < nproc; rank++)
         {
           receive_displacement[rank] = receive_data_count;
           receive_data_count += receive_n[rank];
         }
  
         // Now resize the receive buffer for all data from all processors.
         // Make sure that it has size of at least one.
         if (receive_data_count == 0)
         {
           receive_data_count++;
         }
         Vector<double> receive_values_data(receive_data_count);
  
         // Make sure that the send buffer has size at least one
         // so that we don't get a segmentation fault.
         if (send_values_data.size() == 0)
         {
           send_values_data.resize(1);
         }
  
         // Now send the data between all processors
         MPI_Alltoallv(&send_values_data[0],
                       &send_n[0],
                       &send_displacement[0],
                       MPI_DOUBLE,
                       &receive_values_data[0],
                       &receive_n[0],
                       &receive_displacement[0],
                       MPI_DOUBLE,
                       comm_pt->mpi_comm());
  
         // Data from all other processors are stored in:
         // receive_values_data
         // Data already on this processor is stored in:
         // values_to_send[my_rank]
  
         // Loop through the data on this processor.
         unsigned location_i = 0;
         unsigned my_values_to_send_size = values_to_send[my_rank].size();
         while (location_i < my_values_to_send_size)
         {
           out_vector[unsigned(values_to_send[my_rank][location_i])] =
             values_to_send[my_rank][location_i + 1];
  
           location_i += 2;
         }
  
         // Before we loop through the data on other processors, we need to check
         // if any data has been received.
         bool data_has_been_received = false;
         unsigned send_rank = 0;
         while (send_rank < nproc)
         {
           if (receive_n[send_rank] > 0)
           {
             data_has_been_received = true;
             break;
           }
           send_rank++;
         }
  
         location_i = 0;
         if (data_has_been_received)
         {
           unsigned receive_values_data_size = receive_values_data.size();
           while (location_i < receive_values_data_size)
           {
             out_vector[unsigned(receive_values_data[location_i])] =
               receive_values_data[location_i + 1];
             location_i += 2;
           }
         }
 #else
         {
           std::ostringstream error_message;
           error_message << "I don't know what to do with distributed vectors\n"
                         << "without MPI... :(";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
 #endif
       }
     } // function concatenate
  
     //===========================================================================
     /// Wrapper around the other concatenate(...) function.
     /// Be careful with Vector of vectors. If the DoubleVectors are resized,
     /// there could be reallocation of memory. If we wanted to use the function
     /// which takes a Vector of pointers to DoubleVectors, we would either have
     /// to invoke new and remember to delete, or create a temporary Vector to
     /// store pointers to the DoubleVector objects.
     /// This wrapper is meant to make life easier for the user by avoiding calls
     /// to new/delete AND without creating a temporary vector of pointers to
     /// DoubleVectors.
     /// If we had C++ 11, this would be so much nicer since we can use smart
     /// pointers which will delete themselves, so we do not have to remember
     /// to delete!
     //===========================================================================
     void concatenate(Vector<DoubleVector>& in_vector, DoubleVector& out_vector)
     {
       const unsigned n_in_vector = in_vector.size();
  
       Vector<DoubleVector*> in_vector_pt(n_in_vector, 0);
  
       for (unsigned i = 0; i < n_in_vector; i++)
       {
         in_vector_pt[i] = &in_vector[i];
       }
  
       DoubleVectorHelpers::concatenate(in_vector_pt, out_vector);
     } // function concatenate
  
     //===========================================================================
     /// Split a DoubleVector into the out DoubleVectors.
     /// Let vec_A be the in Vector, and let vec_B and vec_C be the out vectors.
     /// Then the splitting of vec_A is depicted below:
     /// vec_A: [a0  (on p0)
     ///         a1] (on p0)
     ///        [a2  (on p1)
     ///         a3] (on p1)
     ///
     /// vec_B: [a0] (on p0)    vec_C: [a2] (on p0)
     ///        [a1] (on p1)           [a3] (on p1)
     ///
     /// Communication is required between processors.
     /// The out_vector_pt must contain pointers to DoubleVector which has
     /// already been built with the correct distribution; the sum of the number
     /// of global row of the out vectors must be the same the number of global
     /// rows of the in vector.
     //===========================================================================
     void split(const DoubleVector& in_vector,
                Vector<DoubleVector*>& out_vector_pt)
     {
       // How many out vectors do we have?
       unsigned nvec = out_vector_pt.size();
 #ifdef PARANOID
  
       // Check that the in vector is built.
       if (!in_vector.built())
       {
         std::ostringstream error_message;
         error_message << "The in_vector is not built.\n"
                       << "Please build it!.\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Check that all the out vectors are built.
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         if (!out_vector_pt[vec_i]->built())
         {
           std::ostringstream error_message;
           error_message << "The vector at position " << vec_i
                         << "  is not built.\n"
                         << "Please build it!.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
       // Check that the sum of the nrow from out vectors is the same as the
       // nrow from in_vector.
       unsigned out_nrow_sum = 0;
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         out_nrow_sum += out_vector_pt[vec_i]->nrow();
       }
  
       if (in_vector.nrow() != out_nrow_sum)
       {
         std::ostringstream error_message;
         error_message << "The global number of rows in the in_vector\n"
                       << "is not equal to the sum of the global nrows\n"
                       << "of the in vectors.\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Check that all communicators are the same. We use a communicator to
       // get the number of processors and my_rank. So we would like them to be
       // the same for in_vector and all out vectors.
       const OomphCommunicator in_vector_comm =
         *(in_vector.distribution_pt()->communicator_pt());
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         const OomphCommunicator dist_i_comm =
           *(out_vector_pt[vec_i]->distribution_pt()->communicator_pt());
  
         if (in_vector_comm != dist_i_comm)
         {
           std::ostringstream error_message;
           error_message << "The communicator for the distribution in the \n"
                         << "position " << vec_i
                         << " is not the same as the in_vector\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
       // Check that the distributed boolean is the same for all vectors.
       bool para_distributed = in_vector.distributed();
  
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         if (para_distributed != out_vector_pt[vec_i]->distributed())
         {
           std::ostringstream error_message;
           error_message
             << "The vector in position " << vec_i << " does not \n"
             << " have the same distributed boolean as the in_vector\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
 #endif
  
       // The communicator.
       const OomphCommunicator* const comm_pt =
         in_vector.distribution_pt()->communicator_pt();
  
       // Is this distributed?
       bool distributed = in_vector.distributed();
  
       // The serial code.
       if ((comm_pt->nproc() == 1) || !distributed)
       {
         // Serial version of the code: loop through all the out vectors and
         // insert the elements of in_vector.
  
         // index for in vector, and in vector values.
         unsigned in_vec_i = 0;
         double* in_value_pt = in_vector.values_pt();
  
         // Fill in the out vectors.
         for (unsigned out_vec_i = 0; out_vec_i < nvec; out_vec_i++)
         {
           // out vector nrow and values.
           unsigned out_nrow = out_vector_pt[out_vec_i]->nrow();
           double* out_value_pt = out_vector_pt[out_vec_i]->values_pt();
  
           // Fill in the current out vector.
           for (unsigned out_val_i = 0; out_val_i < out_nrow; out_val_i++)
           {
             out_value_pt[out_val_i] = in_value_pt[in_vec_i++];
           }
         }
       }
       // Otherwise we are dealing with a distributed vector.
       else
       {
 #ifdef OOMPH_HAS_MPI
         // For each entry in the in_vector, we need to work out:
         // 1) Which out vector this entry belongs to,
         // 2) which processor to send the data to and
         // 3) the local equation number in the out vector.
         //
         // We know the in_local_eqn, we can work out the in_global_eqn.
         //
         // From in_global_eqn we can work out the out vector and
         // the out_global_eqn.
         //
         // The out_global_eqn allows us to determine which processor to send to.
         // With the out_p (processor to send data to) and out vector, we get the
         // out_first_row which then allows us to work out the out_local_eqn.
  
  
         // Get the number of processors
         unsigned nproc = comm_pt->nproc();
  
         // My rank
         unsigned my_rank = comm_pt->my_rank();
  
         // Storage for the data (per processor) to send.
         Vector<Vector<double>> values_to_send(nproc);
  
         // Sum of the nrow of the out vectors so far. This is used to work out
         // which out_vector a in_global_eqn belongs to.
         Vector<unsigned> sum_of_out_nrow(nvec + 1);
         for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
         {
           sum_of_out_nrow[vec_i + 1] =
             sum_of_out_nrow[vec_i] + out_vector_pt[vec_i]->nrow();
         }
  
         // Loop through the in_vector local values.
         unsigned in_nrow_local = in_vector.nrow_local();
         for (unsigned in_local_eqn = 0; in_local_eqn < in_nrow_local;
              in_local_eqn++)
         {
           // The global equation number of this row.
           unsigned in_global_eqn = in_local_eqn + in_vector.first_row();
  
           // Which out_vector does this in_global_eqn belong to?
           unsigned out_vector_i = 0;
           while (in_global_eqn < sum_of_out_nrow[out_vector_i] ||
                  in_global_eqn >= sum_of_out_nrow[out_vector_i + 1])
           {
             out_vector_i++;
           }
  
           // The out_global_eqn
           // (this is the global equation in the current out vector)
           unsigned out_global_eqn =
             in_global_eqn - sum_of_out_nrow[out_vector_i];
  
           // The processor to send this row to.
           unsigned out_p =
             out_vector_pt[out_vector_i]->distribution_pt()->rank_of_global_row(
               out_global_eqn);
  
           // The local_eqn in the out_vector_i
           unsigned out_local_eqn =
             out_global_eqn -
             out_vector_pt[out_vector_i]->distribution_pt()->first_row(out_p);
  
  
           // Fill in the data to send
  
           // Which out vector to put this data in.
           values_to_send[out_p].push_back(out_vector_i);
  
           // The local equation of the data.
           values_to_send[out_p].push_back(out_local_eqn);
  
           // The actual data.
           values_to_send[out_p].push_back(in_vector[in_local_eqn]);
         }
  
         // Prepare to send the data!
  
         // Storage for the number of data to be sent to each processor.
         Vector<int> send_n(nproc, 0);
  
         // Storage for all the values to be send to each processor.
         Vector<double> send_values_data;
  
         // Storage location within send_values_data
         Vector<int> send_displacement(nproc, 0);
  
         // Get the total amount of data which needs to be sent, so we can
         // reserve space for it.
         unsigned total_ndata = 0;
         for (unsigned rank = 0; rank < nproc; rank++)
         {
           if (rank != my_rank)
           {
             total_ndata += values_to_send[rank].size();
           }
         }
  
         // Now we don't have to re-allocate data/memory when push_back is
         // called. Nb. Using push_back without reserving memory may cause
         // multiple re-allocation behind the scenes, this is expensive.
         send_values_data.reserve(total_ndata);
  
         // Loop over all the processors to "flat pack" the data for sending.
         for (unsigned rank = 0; rank < nproc; rank++)
         {
           // Set the offset for the current processor
           send_displacement[rank] = send_values_data.size();
  
           // Don't bother to do anything if
           // the processor in the loop is the current processor.
           if (rank != my_rank)
           {
             // Put the values into the send data vector.
             unsigned n_data = values_to_send[rank].size();
             for (unsigned j = 0; j < n_data; j++)
             {
               send_values_data.push_back(values_to_send[rank][j]);
             } // Loop over the data
           } // if rank != my_rank
  
           // Find the number of data to be added to the vector.
           send_n[rank] = send_values_data.size() - send_displacement[rank];
         } // Loop over processors
  
         // Storage for the number of data to be received from each processor.
         Vector<int> receive_n(nproc, 0);
         MPI_Alltoall(&send_n[0],
                      1,
                      MPI_INT,
                      &receive_n[0],
                      1,
                      MPI_INT,
                      comm_pt->mpi_comm());
  
         // Prepare the data to be received
         // by working out the displacement from the received data.
         Vector<int> receive_displacement(nproc, 0);
         int receive_data_count = 0;
         for (unsigned rank = 0; rank < nproc; rank++)
         {
           receive_displacement[rank] = receive_data_count;
           receive_data_count += receive_n[rank];
         }
  
         // Now resize the receive buffer for all data from all processors.
         // Make sure that it has size of at least one.
         if (receive_data_count == 0)
         {
           receive_data_count++;
         }
         Vector<double> receive_values_data(receive_data_count);
  
         // Make sure that the send buffer has size at least one
         // so that we don't get a segmentation fault.
         if (send_values_data.size() == 0)
         {
           send_values_data.resize(1);
         }
  
         // Now send the data between all processors
         MPI_Alltoallv(&send_values_data[0],
                       &send_n[0],
                       &send_displacement[0],
                       MPI_DOUBLE,
                       &receive_values_data[0],
                       &receive_n[0],
                       &receive_displacement[0],
                       MPI_DOUBLE,
                       comm_pt->mpi_comm());
  
         // Data from all other processors are stored in:
         // receive_values_data
         // Data already on this processor is stored in:
         // values_to_send[my_rank]
         //
  
         // Index for values_to_send Vector.
         unsigned location_i = 0;
         // Loop through the data on this processor
         unsigned my_values_to_send_size = values_to_send[my_rank].size();
         while (location_i < my_values_to_send_size)
         {
           // The vector to put the values in.
           unsigned out_vector_i =
             unsigned(values_to_send[my_rank][location_i++]);
  
           // Where to put the value.
           unsigned out_local_eqn =
             unsigned(values_to_send[my_rank][location_i++]);
  
           // The actual value!
           double out_value = values_to_send[my_rank][location_i++];
  
           // Insert the value in the out vector.
           (*out_vector_pt[out_vector_i])[out_local_eqn] = out_value;
         }
  
         // Before we loop through the data on other processors, we need to check
         // if any data has been received. This is because the
         // receive_values_data has been resized to at least one, even if no data
         // is sent.
         bool data_has_been_received = false;
         unsigned send_rank = 0;
         while (send_rank < nproc)
         {
           if (receive_n[send_rank] > 0)
           {
             data_has_been_received = true;
             break;
           }
           send_rank++;
         }
  
         // Reset the index, it is now being used to index the
         // receive_values_data vector.
         location_i = 0;
         if (data_has_been_received)
         {
           // Extract the data and put it into the out vector.
           unsigned receive_values_data_size = receive_values_data.size();
           while (location_i < receive_values_data_size)
           {
             // Which out vector to put the value in?
             unsigned out_vector_i = unsigned(receive_values_data[location_i++]);
  
             // Where in the out vector to put the value?
             unsigned out_local_eqn =
               unsigned(receive_values_data[location_i++]);
  
             // The value to put in.
             double out_value = receive_values_data[location_i++];
  
             // Insert the value in the out vector.
             (*out_vector_pt[out_vector_i])[out_local_eqn] = out_value;
           }
         }
 #else
         {
           std::ostringstream error_message;
           error_message << "You have a distributed vector but with no mpi...\n"
                         << "I don't know what to do :( \n";
           throw OomphLibError(
             error_message.str(), "RYARAYERR", OOMPH_EXCEPTION_LOCATION);
         }
 #endif
       }
     } // function split(...)
  
     //===========================================================================
     /// Wrapper around the other split(...) function.
     /// Be careful with Vector of vectors. If the DoubleVectors are resized,
     /// there could be reallocation of memory. If we wanted to use the function
     /// which takes a Vector of pointers to DoubleVectors, we would either have
     /// to invoke new and remember to delete, or create a temporary Vector to
     /// store pointers to the DoubleVector objects.
     /// This wrapper is meant to make life easier for the user by avoiding calls
     /// to new/delete AND without creating a temporary vector of pointers to
     /// DoubleVectors.
     /// If we had C++ 11, this would be so much nicer since we can use smart
     /// pointers which will delete themselves, so we do not have to remember
     /// to delete!
     //===========================================================================
     void split(const DoubleVector& in_vector, Vector<DoubleVector>& out_vector)
     {
       const unsigned n_out_vector = out_vector.size();
       Vector<DoubleVector*> out_vector_pt(n_out_vector, 0);
  
       for (unsigned i = 0; i < n_out_vector; i++)
       {
         out_vector_pt[i] = &out_vector[i];
       }
  
       DoubleVectorHelpers::split(in_vector, out_vector_pt);
     } // function split(...)
  
     //===========================================================================
     /// Concatenate DoubleVectors.
     /// Takes a Vector of DoubleVectors. If the out vector is built, we will not
     /// build a new distribution. Otherwise a new distribution will be built
     /// using LinearAlgebraDistribution::concatenate(...).
     ///
     /// The out vector has its rows permuted according to the individual
     /// distributions of the in vectors. For example, if we have DoubleVectors
     /// with distributions A and B, distributed across two processors
     /// (p0 and p1),
     ///
     /// A: [a0] (on p0)    B: [b0] (on p0)
     ///    [a1] (on p1)       [b1] (on P1),
     ///
     /// then the out_vector is
     ///
     /// [a0  (on p0)
     ///  b0] (on p0)
     /// [a1  (on p1)
     ///  b1] (on p1),
     ///
     /// as opposed to
     ///
     /// [a0  (on p0)
     ///  a1] (on p0)
     /// [b0  (on p1)
     ///  b1] (on p1).
     ///
     /// Note (1): The out vector may not be uniformly distributed even
     /// if the in vectors have uniform distributions. The nrow_local of the
     /// out vector will be the sum of the nrow_local of the in vectors.
     /// Try this out with two distributions of global rows 3 and 5, uniformly
     /// distributed across two processors. Compare this against a distribution
     /// of global row 8 distributed across two processors.
     ///
     /// There are no MPI send and receive, the data stays on the processor
     /// as defined by the distributions from the in vectors.
     //===========================================================================
     void concatenate_without_communication(
       const Vector<DoubleVector*>& in_vector_pt, DoubleVector& out_vector)
     {
       // How many in vectors do we want to concatenate?
       unsigned nvectors = in_vector_pt.size();
  
       // PARANOID checks which involves the in vectors only.
 #ifdef PARANOID
       // Check that there is at least one vector.
       if (nvectors == 0)
       {
         std::ostringstream error_message;
         error_message << "There is no vector to concatenate...\n"
                       << "Perhaps you forgot to fill in_vector_pt?\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Does this vector need concatenating?
       if (nvectors == 1)
       {
         std::ostringstream warning_message;
         warning_message << "There is only one vector to concatenate...\n"
                         << "This does not require concatenating...\n";
         OomphLibWarning(warning_message.str(),
                         OOMPH_CURRENT_FUNCTION,
                         OOMPH_EXCEPTION_LOCATION);
       }
  
       // Check that all the DoubleVectors in in_vector_pt are built
       for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
       {
         if (!in_vector_pt[vec_i]->built())
         {
           std::ostringstream error_message;
           error_message << "The vector in position " << vec_i
                         << " is not built.\n"
                         << "I cannot concatenate an unbuilt vector.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
 #endif
  
       // If the out vector is not built, build it with the correct distribution.
       if (!out_vector.built())
       {
         Vector<LinearAlgebraDistribution*> in_distribution_pt(nvectors, 0);
         for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
         {
           in_distribution_pt[vec_i] = in_vector_pt[vec_i]->distribution_pt();
         }
  
         LinearAlgebraDistribution tmp_distribution;
         LinearAlgebraDistributionHelpers::concatenate(in_distribution_pt,
                                                       tmp_distribution);
         out_vector.build(tmp_distribution, 0.0);
       }
  
       // PARANOID checks which involves all in vectors and out vectors.
 #ifdef PARANOID
  
       // Check that all communicators of the vectors to concatenate are the same
       // by comparing all communicators against the out vector.
       const OomphCommunicator out_comm =
         *(out_vector.distribution_pt()->communicator_pt());
  
       for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
       {
         // Get the Communicator for the current vector.
         const OomphCommunicator in_comm =
           *(in_vector_pt[vec_i]->distribution_pt()->communicator_pt());
  
         if (out_comm != in_comm)
         {
           std::ostringstream error_message;
           error_message << "The vector in position " << vec_i << " has a\n"
                         << "different communicator from the out vector.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
       // Check that the distributed boolean is the same for all vectors.
       if (out_comm.nproc() > 1)
       {
         const bool out_distributed = out_vector.distributed();
         for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
         {
           if (out_distributed != in_vector_pt[vec_i]->distributed())
           {
             std::ostringstream error_message;
             error_message << "The vector in position " << vec_i << " has a\n"
                           << "different distributed boolean from the "
                           << "out vector.\n";
             throw OomphLibError(error_message.str(),
                                 OOMPH_CURRENT_FUNCTION,
                                 OOMPH_EXCEPTION_LOCATION);
           }
         }
       }
  
       // Check that the distribution from the out vector is indeed the
       // same as the one created by
       // LinearAlgebraDistributionHelpers::concatenate(...). This test is
       // redundant if the out_vector is not built to begin with.
  
       // Create tmp_distribution, a concatenation of all distributions from
       // the in vectors.
       Vector<LinearAlgebraDistribution*> in_distribution_pt(nvectors, 0);
       for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
       {
         in_distribution_pt[vec_i] = in_vector_pt[vec_i]->distribution_pt();
       }
  
       LinearAlgebraDistribution tmp_distribution;
       LinearAlgebraDistributionHelpers::concatenate(in_distribution_pt,
                                                     tmp_distribution);
       // The the distribution from the out vector.
       LinearAlgebraDistribution out_distribution =
         *(out_vector.distribution_pt());
  
       // Compare them!
       if (tmp_distribution != out_distribution)
       {
         std::ostringstream error_message;
         error_message << "The distribution of the out vector is not correct.\n"
                       << "Please call the function with a cleared out vector,\n"
                       << "or compare the distribution of the out vector with\n"
                       << "the distribution created by\n"
                       << "LinearAlgebraDistributionHelpers::concatenate(...)\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Do not need these distributions.
       tmp_distribution.clear();
       out_distribution.clear();
 #endif
  
  
       unsigned out_value_offset = 0;
  
       double* out_value_pt = out_vector.values_pt();
  
       // Loop through the vectors.
       for (unsigned vec_i = 0; vec_i < nvectors; vec_i++)
       {
         // Get the nrow_local and
         // pointer to the values for the current in vector.
         unsigned in_vector_nrow_local = in_vector_pt[vec_i]->nrow_local();
         double* in_vector_value_pt = in_vector_pt[vec_i]->values_pt();
  
         // Loop through the local values and inset them into the out_vector.
         for (unsigned val_i = 0; val_i < in_vector_nrow_local; val_i++)
         {
           out_value_pt[out_value_offset + val_i] = in_vector_value_pt[val_i];
         }
  
         // Update the offset.
         out_value_offset += in_vector_nrow_local;
       }
     } // function concatenate_without_communication
  
     //===========================================================================
     /// Wrapper around the other concatenate_without_communication(...)
     /// function.
     /// Be careful with Vector of vectors. If the DoubleVectors are resized,
     /// there could be reallocation of memory. If we wanted to use the function
     /// which takes a Vector of pointers to DoubleVectors, we would either have
     /// to invoke new and remember to delete, or create a temporary Vector to
     /// store pointers to the DoubleVector objects.
     /// This wrapper is meant to make life easier for the user by avoiding calls
     /// to new/delete AND without creating a temporary vector of pointers to
     /// DoubleVectors.
     /// If we had C++ 11, this would be so much nicer since we can use smart
     /// pointers which will delete themselves, so we do not have to remember
     /// to delete!
     //===========================================================================
     void concatenate_without_communication(Vector<DoubleVector>& in_vector,
                                            DoubleVector& out_vector)
     {
       const unsigned n_in_vector = in_vector.size();
  
       Vector<DoubleVector*> in_vector_pt(n_in_vector, 0);
  
       for (unsigned i = 0; i < n_in_vector; i++)
       {
         in_vector_pt[i] = &in_vector[i];
       }
  
       DoubleVectorHelpers::concatenate_without_communication(in_vector_pt,
                                                              out_vector);
     } // function concatenate_without_communication
  
     //===========================================================================
     /// Split a DoubleVector into the out DoubleVectors.
     /// Data stays on its current processor, no data is sent between processors.
     /// This results in our vectors which are a permutation of the in vector.
     ///
     /// Let vec_A be the in Vector, and let vec_B and vec_C be the out vectors.
     /// Then the splitting of vec_A is depicted below:
     /// vec_A: [a0  (on p0)
     ///         a1] (on p0)
     ///        [a2  (on p1)
     ///         a3] (on p1)
     ///
     /// vec_B: [a0] (on p0)    vec_C: [a1] (on p0)
     ///        [a2] (on p1)           [a3] (on p1).
     ///
     /// This means that the distribution of the in vector MUST be a
     /// concatenation of the out vector distributions, refer to
     /// LinearAlgebraDistributionHelpers::concatenate(...) to concatenate
     /// distributions.
     //===========================================================================
     void split_without_communication(const DoubleVector& in_vector,
                                      Vector<DoubleVector*>& out_vector_pt)
     {
       // How many out vectors do we need?
       unsigned nvec = out_vector_pt.size();
  
 #ifdef PARANOID
       // Check that in_vector is built
       if (!in_vector.built())
       {
         std::ostringstream error_message;
         error_message << "The in_vector is not built.\n"
                       << "Please build it!.\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Check that all out vectors are built.
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         if (!out_vector_pt[vec_i]->built())
         {
           std::ostringstream error_message;
           error_message << "The vector at position " << vec_i
                         << " is not built.\n"
                         << "Please build it!.\n";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
       // Check that the concatenation of distributions from the out vectors is
       // the same as the distribution from in_vector.
  
       // Create the distribution from out_distribution.
       Vector<LinearAlgebraDistribution*> tmp_out_distribution_pt(nvec, 0);
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         tmp_out_distribution_pt[vec_i] =
           out_vector_pt[vec_i]->distribution_pt();
       }
  
       LinearAlgebraDistribution tmp_distribution;
       LinearAlgebraDistributionHelpers::concatenate(tmp_out_distribution_pt,
                                                     tmp_distribution);
       // Compare the distributions
       if (tmp_distribution != *(in_vector.distribution_pt()))
       {
         std::ostringstream error_message;
         error_message << "The distribution from the in vector is incorrect.\n"
                       << "It must be a concatenation of all the distributions\n"
                       << "from the out vectors.\n";
         throw OomphLibError(error_message.str(),
                             OOMPH_CURRENT_FUNCTION,
                             OOMPH_EXCEPTION_LOCATION);
       }
  
       // Clear the distribution.
       tmp_distribution.clear();
  
       // Check that all communicators are the same. We use a communicator to
       // get the number of processors and my_rank. So we would like them to be
       // the same for the in vector and all the out vectors.
       const OomphCommunicator in_vector_comm =
         *(in_vector.distribution_pt()->communicator_pt());
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         const OomphCommunicator vec_i_comm =
           *(out_vector_pt[vec_i]->distribution_pt()->communicator_pt());
  
         if (in_vector_comm != vec_i_comm)
         {
           std::ostringstream error_message;
           error_message << "The communicator for the vector in position\n"
                         << vec_i << " is not the same as the in_vector\n"
                         << "communicator.";
           throw OomphLibError(error_message.str(),
                               OOMPH_CURRENT_FUNCTION,
                               OOMPH_EXCEPTION_LOCATION);
         }
       }
  
       // Check that if the in vector is distributed, then all the out vectors
       // are also distributed.
       if (in_vector_comm.nproc() > 1)
       {
         bool in_distributed = in_vector.distributed();
         for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
         {
           if (in_distributed != out_vector_pt[vec_i]->distributed())
           {
             std::ostringstream error_message;
             error_message << "The vector in position " << vec_i
                           << " does not have\n"
                           << "the same distributed boolean as the in vector";
             throw OomphLibError(error_message.str(),
                                 OOMPH_CURRENT_FUNCTION,
                                 OOMPH_EXCEPTION_LOCATION);
           }
         }
       }
 #endif
  
       // Loop through the sub vectors and insert the values from the
       // in vector.
       double* in_value_pt = in_vector.values_pt();
       unsigned in_value_offset = 0;
       for (unsigned vec_i = 0; vec_i < nvec; vec_i++)
       {
         // The nrow_local and values for the current out vector.
         unsigned out_nrow_local = out_vector_pt[vec_i]->nrow_local();
         double* out_value_pt = out_vector_pt[vec_i]->values_pt();
  
         // Loop through the local values of out vector.
         for (unsigned val_i = 0; val_i < out_nrow_local; val_i++)
         {
           out_value_pt[val_i] = in_value_pt[in_value_offset + val_i];
         }
  
         // Update the offset.
         in_value_offset += out_nrow_local;
       }
     } // function split_distribution_vector
  
     //===========================================================================
     /// Wrapper around the other split_without_communication(...)
     /// function.
     /// Be careful with Vector of vectors. If the DoubleVectors are resized,
     /// there could be reallocation of memory. If we wanted to use the function
     /// which takes a Vector of pointers to DoubleVectors, we would either have
     /// to invoke new and remember to delete, or create a temporary Vector to
     /// store pointers to the DoubleVector objects.
     /// This wrapper is meant to make life easier for the user by avoiding calls
     /// to new/delete AND without creating a temporary vector of pointers to
     /// DoubleVectors.
     /// If we had C++ 11, this would be so much nicer since we can use smart
     /// pointers which will delete themselves, so we do not have to remember
     /// to delete!
     //===========================================================================
     void split_without_communication(const DoubleVector& in_vector,
                                      Vector<DoubleVector>& out_vector)
     {
       const unsigned n_out_vector = out_vector.size();
  
       Vector<DoubleVector*> out_vector_pt(n_out_vector, 0);
  
       for (unsigned i = 0; i < n_out_vector; i++)
       {
         out_vector_pt[i] = &out_vector[i];
       }
  
       DoubleVectorHelpers::split_without_communication(in_vector,
                                                        out_vector_pt);
  
     } // function split_distribution_vector
  
   } // namespace DoubleVectorHelpers
  
 } // namespace oomph