preconditioner_array.cc
Go to the documentation of this file.
1 // LIC// ====================================================================
2 // LIC// This file forms part of oomph-lib, the object-oriented,
3 // LIC// multi-physics finite-element library, available
4 // LIC// at http://www.oomph-lib.org.
5 // LIC//
6 // LIC// Copyright (C) 2006-2023 Matthias Heil and Andrew Hazel
7 // LIC//
8 // LIC// This library is free software; you can redistribute it and/or
9 // LIC// modify it under the terms of the GNU Lesser General Public
10 // LIC// License as published by the Free Software Foundation; either
11 // LIC// version 2.1 of the License, or (at your option) any later version.
12 // LIC//
13 // LIC// This library is distributed in the hope that it will be useful,
14 // LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // LIC// Lesser General Public License for more details.
17 // LIC//
18 // LIC// You should have received a copy of the GNU Lesser General Public
19 // LIC// License along with this library; if not, write to the Free Software
20 // LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 // LIC// 02110-1301 USA.
22 // LIC//
23 // LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
24 // LIC//
25 // LIC//====================================================================
26 
27 // Config header generated by autoconfig
28 #ifdef HAVE_CONFIG_H
29 #include <oomph-lib-config.h>
30 #endif
31 
32 // Preconditioner array is only useful if we have mpi, otherwise a dummy
33 // implmentation is used and this file doesn't need to implement anything
34 // (see the header file).
35 #ifdef OOMPH_HAS_MPI
36 
37 // oomph-lib includes
38 #include "preconditioner_array.h"
39 
40 namespace oomph
41 {
42  //============================================================================
43  /// Setup the preconditioners. Sets up each preconditioner in the
44  /// array for the corresponding matrix in the vector matrix_pt.
45  /// The number of preconditioners in the array is taken to be the length of
46  /// prec_pt.
47  //============================================================================
49  Vector<CRDoubleMatrix*> matrix_pt,
50  Vector<Preconditioner*> prec_pt,
51  const OomphCommunicator* comm_pt)
52  {
53  // clean memory
54  this->clean_up_memory();
55 
56  // get the number of preconditioners in the array
57  Nprec = prec_pt.size();
58 
59 #ifdef PARANOID
60  // check that the preconditioners have been set
61  if (Nprec < 2)
62  {
63  std::ostringstream error_message;
64  error_message << "The PreconditionerArray requires at least 2 "
65  << "preconditioners";
66  throw OomphLibError(
67  error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
68  }
69  // first check that the vector matrix_pt is the correct length
70  if (matrix_pt.size() != Nprec)
71  {
72  std::ostringstream error_message;
73  error_message << "The same number of preconditioners and matrices must "
74  << "be passed to the setup_preconditioners(...).";
75  throw OomphLibError(
76  error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
77  }
78 
79  // Resize the storage of the PARANOID check distributions
80  // Already cleared by clean_up_memory call at top of function
81  Distribution_pt.resize(Nprec);
82 #endif
83 
84  // for each matrix... PARANOID and store copy of global communicator
85  for (unsigned i = 0; i < Nprec; i++)
86  {
87 #ifdef PARANOID
88  // paranoid check that each matrix is a CRDoubleMatrix and that
89  // it is built
90  if (matrix_pt[i] == 0)
91  {
92  std::ostringstream error_message;
93  error_message << "matrix_pt[" << i << "] = NULL.";
94  throw OomphLibError(error_message.str(),
95  OOMPH_CURRENT_FUNCTION,
96  OOMPH_EXCEPTION_LOCATION);
97  }
98 
99  // check the matrix has been built
100  if (!matrix_pt[i]->built())
101  {
102  std::ostringstream error_message;
103  error_message << "Matrix " << i << " has not been built.";
104  throw OomphLibError(error_message.str(),
105  OOMPH_CURRENT_FUNCTION,
106  OOMPH_EXCEPTION_LOCATION);
107  }
108 #endif
109 
110  // check that all the matrices have the same communicator
111  // and store a copy of the communicator
112  if (i == 0)
113  {
114  Global_communicator_pt = new OomphCommunicator(
115  matrix_pt[i]->distribution_pt()->communicator_pt());
116  }
117 
118 #ifdef PARANOID
119  else
120  {
121  if (*Global_communicator_pt !=
122  *matrix_pt[i]->distribution_pt()->communicator_pt())
123  {
124  std::ostringstream error_message;
125  error_message << "All matrices must have the same communicator.";
126  throw OomphLibError(error_message.str(),
127  OOMPH_CURRENT_FUNCTION,
128  OOMPH_EXCEPTION_LOCATION);
129  }
130  }
131 
132  // store a copy of the Distribution of each preconditioner for future
133  // PARANOID checks
134  Distribution_pt[i] =
135  new LinearAlgebraDistribution(matrix_pt[i]->distribution_pt());
136 #endif
137  }
138 
139  // number of processors
140  unsigned nproc = Global_communicator_pt->nproc();
141 
142  // next compute the distribution of the preconditioner over the processors
143  // such that each preconditioner has an (as to near to) equal number of
144  // processors
145  First_proc_for_prec.resize(Nprec);
146  Nproc_for_prec.resize(Nprec);
147 
148  // compute first row
149  for (unsigned p = 0; p < Nprec; p++)
150  {
151  First_proc_for_prec[p] = unsigned(double(p * nproc) / double(Nprec));
152  }
153 
154  // compute local nrow
155  for (unsigned p = 0; p < Nprec - 1; p++)
156  {
158  }
159  Nproc_for_prec[Nprec - 1] = nproc - First_proc_for_prec[Nprec - 1];
160 
161 #ifdef PARANOID
162  // paranoid check that every preconditioner has more than one processor
163  for (unsigned p = 0; p < Nprec; p++)
164  {
165  if (Nproc_for_prec[p] == 0)
166  {
167  std::ostringstream error_message;
168  error_message << "We only have " << nproc << " processor[s]!\n"
169  << "This is not enough to perform the " << Nprec
170  << " block solves in parallel! Sorry! \n"
171  << "Please run this with more processors or disable the\n"
172  << "request for two-level paralellism.\n";
173  throw OomphLibError(error_message.str(),
174  OOMPH_CURRENT_FUNCTION,
175  OOMPH_EXCEPTION_LOCATION);
176  }
177  }
178 #endif
179 
180  // compute the color of this processor
181  Color = 0;
182  unsigned my_rank = Global_communicator_pt->my_rank();
183  while (!(First_proc_for_prec[Color] <= my_rank &&
185  {
186  Color++;
187  }
188 
189  // create the local preconditioner
191 
192  // pointer for the local matrix on this processor
193  CRDoubleMatrix* local_matrix_pt = 0;
194 
195  // resize storage for details of the data to be sent and received
196  First_row_for_proc.resize(Nprec);
197  Nrow_local_for_proc.resize(Nprec);
198  First_row_from_proc.resize(Nprec);
199  Nrow_local_from_proc.resize(Nprec);
200 
201  // Vector of MPI_Requests - used for distributed matrices
202  Vector<MPI_Request> req;
203 
204  // Counter for the number of requests used
205  unsigned c = 0;
206 
207  // storage for the target distribution
208  Vector<Vector<unsigned>> target_first_row(Nprec);
209  Vector<Vector<unsigned>> target_nrow_local(Nprec);
210 
211  // create storage for the nnz to be sent and received for each
212  // preconditioner
213  Vector<Vector<unsigned>> nnz_send(Nprec);
214  Vector<Vector<unsigned>> nnz_recv(Nprec);
215 
216 
217  /// //////////////////////////////////////////////////////////////////////////
218  /// //////////////////////////////////////////////////////////////////////////
219  /// //////////////////////////////////////////////////////////////////////////
220  /// //////////////////////////////////////////////////////////////////////////
221 
222 
223  // METHOD 0
224  if (Method == 0)
225  {
226  // for every matrix we assemble the duplicate of the matrix on fewer
227  // processors and setup the preconditioner
228  for (unsigned i = 0; i < Nprec; i++)
229  {
230  // if the matrix is global (!distributed) then just construct a copy
231  // on the subset of processors
232  if (matrix_pt[i]->distributed())
233  {
234  // first compute the distribution of this preconditioner on its subset
235  // of processors
236 
237  // number of rows for this preconditioner
238  unsigned nrow = matrix_pt[i]->nrow();
239 
240  // setup First_row_for_local_prec and Nrow_local_for_local_prec
241  target_first_row[i].resize(nproc);
242  target_nrow_local[i].resize(nproc);
243  unsigned nproc_local = Nproc_for_prec[i];
244  for (unsigned p = 0; p < nproc_local; p++)
245  {
246  int pp = First_proc_for_prec[i] + p;
247  target_first_row[i][pp] =
248  unsigned(double(p * nrow) / double(nproc_local));
249  }
250  for (unsigned p = 0; p < nproc_local - 1; p++)
251  {
252  int pp = First_proc_for_prec[i] + p;
253  target_nrow_local[i][pp] =
254  target_first_row[i][pp + 1] - target_first_row[i][pp];
255  }
256  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
257  target_nrow_local[i][last_local_proc] =
258  nrow - target_first_row[i][last_local_proc];
259 
260  // get the details of the current distribution
261  Vector<unsigned> current_first_row(nproc);
262  Vector<unsigned> current_nrow_local(nproc);
263  for (unsigned p = 0; p < nproc; p++)
264  {
265  current_first_row[p] = matrix_pt[i]->first_row(p);
266  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
267  }
268 
269  // resize storage for details of the data to be sent and received
270  First_row_for_proc[i].resize(nproc, 0);
271  Nrow_local_for_proc[i].resize(nproc, 0);
272  First_row_from_proc[i].resize(nproc, 0);
273  Nrow_local_from_proc[i].resize(nproc, 0);
274 
275  // for every processor compute first_row and nrow_local that will
276  // will sent and received by this processor
277  for (unsigned p = 0; p < nproc; p++)
278  {
279  // start with data to be sent
280  if ((target_first_row[i][p] <
281  (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
282  (current_first_row[my_rank] <
283  (target_first_row[i][p] + target_nrow_local[i][p])))
284  {
285  First_row_for_proc[i][p] =
286  std::max(current_first_row[my_rank], target_first_row[i][p]);
287  Nrow_local_for_proc[i][p] =
288  std::min(
289  (current_first_row[my_rank] + current_nrow_local[my_rank]),
290  (target_first_row[i][p] + target_nrow_local[i][p])) -
291  First_row_for_proc[i][p];
292  }
293 
294  // and data to be received
295  if ((target_first_row[i][my_rank] <
296  (current_first_row[p] + current_nrow_local[p])) &&
297  (current_first_row[p] < (target_first_row[i][my_rank] +
298  target_nrow_local[i][my_rank])))
299  {
300  First_row_from_proc[i][p] =
301  std::max(current_first_row[p], target_first_row[i][my_rank]);
302  Nrow_local_from_proc[i][p] =
303  std::min((current_first_row[p] + current_nrow_local[p]),
304  (target_first_row[i][my_rank] +
305  target_nrow_local[i][my_rank])) -
307  }
308  }
309 
310  // resize nnz_send
311  nnz_send[i].resize(nproc);
312 
313  // compute the number of nnzs to be sent
314  // and the number of send and receive requests to be made (nreq)
315  for (unsigned p = 0; p < nproc; p++)
316  {
317  if (Nrow_local_for_proc[i][p] != 0)
318  {
319  int* row_start = matrix_pt[i]->row_start();
320  unsigned k =
321  First_row_for_proc[i][p] - current_first_row[my_rank];
322  nnz_send[i][p] =
323  row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
324  }
325  }
326 
327  // send nnz to be sent to each processor
328  for (unsigned p = 0; p < nproc; p++)
329  {
330  // dont mpi send to send
331  if (p != my_rank)
332  {
333  // non block send
334  if (Nrow_local_for_proc[i][p] != 0)
335  {
336  // send to other processors
337  int tag = this->compute_tag(nproc, my_rank, p, 0);
338  MPI_Request tr;
339  req.push_back(tr);
340  MPI_Isend(&nnz_send[i][p],
341  1,
342  MPI_UNSIGNED,
343  p,
344  tag,
345  Global_communicator_pt->mpi_comm(),
346  &req[c]);
347  c++;
348  }
349  }
350  }
351 
352  // resize nnz_recv
353  nnz_recv[i].resize(nproc);
354 
355  // receive nnz from other processors
356  for (unsigned pp = 0; pp < nproc; pp++)
357  {
358  // next processor to receive from
359  unsigned p = (nproc + my_rank - pp) % nproc;
360 
361  // dont mpi send to send
362  if (p != my_rank)
363  {
364  // blocking recv
365  if (Nrow_local_from_proc[i][p] != 0)
366  {
367  int tag = this->compute_tag(nproc, p, my_rank, 0);
368  MPI_Status stat;
369  unsigned nnz_temp;
370  MPI_Recv(&nnz_temp,
371  1,
372  MPI_UNSIGNED,
373  p,
374  tag,
375  Global_communicator_pt->mpi_comm(),
376  &stat);
377  nnz_recv[i][p] = nnz_temp;
378  }
379  }
380 
381  // receive from self
382  else
383  {
384  nnz_recv[i][p] = nnz_send[i][p];
385  }
386  }
387 
388  // get pointers to the underlying data in the current matrix
389  double* values_send = matrix_pt[i]->value();
390  int* row_start_send = matrix_pt[i]->row_start();
391  int* column_index_send = matrix_pt[i]->column_index();
392 
393  // send and receive the contents of the vector
394  for (unsigned p = 0; p < nproc; p++)
395  {
396  // use mpi methods to send to and receive from all but my rank
397  if (p != my_rank)
398  {
399  // send
400  if (nnz_send[i][p] != 0)
401  {
402  // compute the offset for row_start
403  int offset_n =
404  First_row_for_proc[i][p] - current_first_row[my_rank];
405 
406  // compute the offset for the values and column_index
407  int offset_nnz = row_start_send[offset_n];
408 
409  // values
410  int tag = this->compute_tag(nproc, my_rank, p, 1);
411  MPI_Request tr1;
412  req.push_back(tr1);
413  MPI_Isend(values_send + offset_nnz,
414  int(nnz_send[i][p]),
415  MPI_DOUBLE,
416  p,
417  tag,
418  Global_communicator_pt->mpi_comm(),
419  &req[c]);
420  c++;
421 
422  // column_index
423  tag = this->compute_tag(nproc, my_rank, p, 2);
424  MPI_Request tr2;
425  req.push_back(tr2);
426  MPI_Isend(column_index_send + offset_nnz,
427  int(nnz_send[i][p]),
428  MPI_INT,
429  p,
430  tag,
431  Global_communicator_pt->mpi_comm(),
432  &req[c]);
433  c++;
434 
435  // row_start
436  tag = this->compute_tag(nproc, my_rank, p, 3);
437  MPI_Request tr3;
438  req.push_back(tr3);
439  MPI_Isend(row_start_send + offset_n,
440  int(Nrow_local_for_proc[i][p]),
441  MPI_INT,
442  p,
443  tag,
444  Global_communicator_pt->mpi_comm(),
445  &req[c]);
446  c++;
447  }
448  }
449  }
450  }
451  }
452 
453  // for every matrix we assemble the duplicate of the matrix on fewer
454  // processors and setup the preconditioner
455  for (unsigned i = 0; i < Nprec; i++)
456  {
457  // if the matrix is global (!distributed) then just construct a copy
458  // on the subset of processors
459  if (!matrix_pt[i]->distributed())
460  {
461  oomph_info << "matrix not distributed" << std::endl;
462  // if this matrix is to be preconditioned my this processor
463  if (i == Color)
464  {
465  // create the local distribution for this matrix
466  LinearAlgebraDistribution* temp_dist_pt =
467  new LinearAlgebraDistribution(
468  Local_communicator_pt, matrix_pt[i]->nrow(), false);
469 
470  // create the corresponding matrix
471  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
472  delete temp_dist_pt; // (dist has now been copied)
473 
474  // get pointers to the underlying data
475  double* values_pt = matrix_pt[i]->value();
476  int* column_index_pt = matrix_pt[i]->column_index();
477  int* row_start_pt = matrix_pt[i]->row_start();
478 
479  // build the matrix without a copy of the data
480  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
481  matrix_pt[i]->nnz(),
482  values_pt,
483  column_index_pt,
484  row_start_pt);
485  }
486  }
487 
488  // else we assemble a copy of the matrix distributed over a subset of
489  // processors
490  else
491  {
492  // number of rows for this preconditioner
493 
494  // if we are assembling the matrix on this processor
495  if (i == Color)
496  {
497  // create the local distribution for this matrix
498  LinearAlgebraDistribution* temp_dist_pt =
499  new LinearAlgebraDistribution(Local_communicator_pt,
500  target_first_row[i][my_rank],
501  target_nrow_local[i][my_rank]);
502 
503  // create the corresponding matrix
504  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
505  delete temp_dist_pt; // (dist has now been copied)
506 
507  // get the number of nnzs to be received from each processor
508 
509  // total number of nnz to be reveived
510  unsigned nnz_total = 0;
511  for (unsigned p = 0; p < nproc; p++)
512  {
513  nnz_total += nnz_recv[i][p];
514  }
515 
516  // compute nnz block start
517  Vector<unsigned> nnz_start_proc;
518  Vector<unsigned> nnz_start_index;
519  unsigned row_ptr = target_first_row[i][my_rank];
520  int p = 0;
521  unsigned nnz_ptr = 0;
522  for (p = 0; p < int(nproc); p++)
523  {
524  if (First_row_from_proc[i][p] == row_ptr &&
525  Nrow_local_from_proc[i][p] != 0 && nnz_ptr != nnz_total)
526  {
527  nnz_start_proc.push_back(p);
528  nnz_start_index.push_back(nnz_ptr);
529  nnz_ptr += nnz_recv[i][p];
530  row_ptr += Nrow_local_from_proc[i][p];
531  p = -1;
532  }
533  }
534 
535  // storage for received data
536  double* values_recv = new double[nnz_total];
537  int* column_index_recv = new int[nnz_total];
538  int* row_start_recv = new int[target_nrow_local[i][my_rank] + 1];
539 
540  // send and receive the contents of the vector
541  for (unsigned pp = 0; pp < nproc; pp++)
542  {
543  // next processor to receive from
544  unsigned p = (nproc + my_rank - pp) % nproc;
545 
546  // use mpi methods to send to and receive from all but my rank
547  if (p != my_rank)
548  {
549  // just receive
550  if (nnz_recv[i][p] != 0)
551  {
552  // compute the offset for row_start
553  int offset_n =
554  First_row_from_proc[i][p] - target_first_row[i][my_rank];
555 
556  // compute the offset for the values and column_index
557  unsigned k = 0;
558  while (nnz_start_proc[k] != p)
559  {
560  k++;
561  }
562  int offset_nnz = nnz_start_index[k];
563 
564  // values
565  int tag = this->compute_tag(nproc, p, my_rank, 1);
566  MPI_Status stat1;
567  MPI_Recv(values_recv + offset_nnz,
568  int(nnz_recv[i][p]),
569  MPI_DOUBLE,
570  p,
571  tag,
572  Global_communicator_pt->mpi_comm(),
573  &stat1);
574 
575  // column_index
576  tag = this->compute_tag(nproc, p, my_rank, 2);
577  MPI_Status stat2;
578  MPI_Recv(column_index_recv + offset_nnz,
579  int(nnz_recv[i][p]),
580  MPI_INT,
581  p,
582  tag,
583  Global_communicator_pt->mpi_comm(),
584  &stat2);
585 
586  // row_start
587  tag = this->compute_tag(nproc, p, my_rank, 3);
588  MPI_Status stat3;
589  MPI_Recv(row_start_recv + offset_n,
590  int(Nrow_local_from_proc[i][p]),
591  MPI_INT,
592  p,
593  tag,
594  Global_communicator_pt->mpi_comm(),
595  &stat3);
596  }
597  }
598  // otehrwise just send to self (or copy)
599  else
600  {
601  if (nnz_recv[i][p] != 0)
602  {
603  // get pointers to the underlying data in the current matrix
604  double* values_send = matrix_pt[i]->value();
605  int* row_start_send = matrix_pt[i]->row_start();
606  int* column_index_send = matrix_pt[i]->column_index();
607 
608  // offset for row_start send to self
609  unsigned offset_n_send =
610  First_row_for_proc[i][my_rank] - matrix_pt[i]->first_row(p);
611  // offset for values and column+_index send to self
612  unsigned offset_nnz_send = row_start_send[offset_n_send];
613 
614  // offset for row_start receive from self
615  unsigned offset_n_recv = First_row_from_proc[i][my_rank] -
616  target_first_row[i][my_rank];
617 
618  // offset for values and columm_index receive from self
619  unsigned k = 0;
620  while (nnz_start_proc[k] != p)
621  {
622  k++;
623  }
624  unsigned offset_nnz_recv = nnz_start_index[k];
625 
626  // and send
627 
628  // values and column_index
629  unsigned n_nnz = nnz_send[i][my_rank];
630  for (unsigned j = 0; j < n_nnz; j++)
631  {
632  values_recv[offset_nnz_recv + j] =
633  values_send[offset_nnz_send + j];
634  column_index_recv[offset_nnz_recv + j] =
635  column_index_send[offset_nnz_send + j];
636  }
637 
638  // row start
639  unsigned n_n = Nrow_local_from_proc[i][my_rank];
640  for (unsigned j = 0; j < n_n; j++)
641  {
642  row_start_recv[offset_n_recv + j] =
643  row_start_send[offset_n_send + j];
644  }
645  }
646  }
647  }
648 
649 
650  // number of processors contributing to the local vector on this
651  // processor
652 
653  // update the row start
654  unsigned nproc_contrib = nnz_start_index.size();
655  for (unsigned j = 0; j < nproc_contrib; j++)
656  {
657  unsigned first = First_row_from_proc[i][nnz_start_proc[j]] -
658  target_first_row[i][my_rank];
659  unsigned last =
660  first + Nrow_local_from_proc[i][nnz_start_proc[j]];
661  unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
662  for (unsigned k = first; k < last; k++)
663  {
664  row_start_recv[k] += nnz_inc;
665  }
666  }
667  row_start_recv[target_nrow_local[i][my_rank]] = int(nnz_total);
668 
669  // build the matrix without a copy of the data
670  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
671  nnz_total,
672  values_recv,
673  column_index_recv,
674  row_start_recv);
675  }
676  }
677  }
678 
679  // wait for all sends to complete
680  if (c != 0)
681  {
682  Vector<MPI_Status> stat(c);
683  MPI_Waitall(c, &req[0], &stat[0]);
684  }
685  }
686 
687 
688  /// //////////////////////////////////////////////////////////////////////////
689  /// //////////////////////////////////////////////////////////////////////////
690  /// //////////////////////////////////////////////////////////////////////////
691  /// //////////////////////////////////////////////////////////////////////////
692 
693 
694  // METHOD 1
695  else if (Method == 1)
696  {
697  // temporary storgage for nnz recv
698  unsigned* nnz_recv_temp = new unsigned[nproc * Nprec];
699  for (unsigned j = 0; j < nproc * Nprec; j++)
700  {
701  nnz_recv_temp[j] = 0;
702  }
703 
704  // for every matrix we assemble the duplicate of the matrix on fewer
705  // processors and setup the preconditioner
706  for (unsigned i = 0; i < Nprec; i++)
707  {
708  // if the matrix is global (!distributed) then just construct a copy
709  // on the subset of processors
710  if (!matrix_pt[i]->distributed())
711  {
712  // if this matrix is to be preconditioned my this processor
713  if (i == Color)
714  {
715  // create the local distribution for this matrix
716  LinearAlgebraDistribution* temp_dist_pt =
717  new LinearAlgebraDistribution(
718  Local_communicator_pt, matrix_pt[i]->nrow(), false);
719 
720  // create the corresponding matrix
721  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
722  delete temp_dist_pt; // (dist has now been copied)
723 
724  // get pointers to the underlying data
725  double* values_pt = matrix_pt[i]->value();
726  int* column_index_pt = matrix_pt[i]->column_index();
727  int* row_start_pt = matrix_pt[i]->row_start();
728 
729  // build the matrix without a copy of the data
730  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
731  matrix_pt[i]->nnz(),
732  values_pt,
733  column_index_pt,
734  row_start_pt);
735  }
736  }
737 
738  // if the matrix is global (!distributed) then just construct a copy
739  // on the subset of processors
740  else
741  {
742  // first compute the distribution of this preconditioner on its subset
743  // of processors
744 
745  // number of rows for this preconditioner
746  unsigned nrow = matrix_pt[i]->nrow();
747 
748  // setup First_row_for_local_prec and Nrow_local_for_local_prec
749  target_first_row[i].resize(nproc);
750  target_nrow_local[i].resize(nproc);
751  unsigned nproc_local = Nproc_for_prec[i];
752  for (unsigned p = 0; p < nproc_local; p++)
753  {
754  int pp = First_proc_for_prec[i] + p;
755  target_first_row[i][pp] =
756  unsigned(double(p * nrow) / double(nproc_local));
757  }
758  for (unsigned p = 0; p < nproc_local - 1; p++)
759  {
760  int pp = First_proc_for_prec[i] + p;
761  target_nrow_local[i][pp] =
762  target_first_row[i][pp + 1] - target_first_row[i][pp];
763  }
764  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
765  target_nrow_local[i][last_local_proc] =
766  nrow - target_first_row[i][last_local_proc];
767 
768  // get the details of the current distribution
769  Vector<unsigned> current_first_row(nproc);
770  Vector<unsigned> current_nrow_local(nproc);
771  for (unsigned p = 0; p < nproc; p++)
772  {
773  current_first_row[p] = matrix_pt[i]->first_row(p);
774  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
775  }
776 
777  // resize storage for details of the data to be sent and received
778  First_row_for_proc[i].resize(nproc, 0);
779  Nrow_local_for_proc[i].resize(nproc, 0);
780  First_row_from_proc[i].resize(nproc, 0);
781  Nrow_local_from_proc[i].resize(nproc, 0);
782 
783  // for every processor compute first_row and nrow_local that will
784  // will sent and received by this processor
785  for (unsigned p = 0; p < nproc; p++)
786  {
787  // start with data to be sent
788  if ((target_first_row[i][p] <
789  (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
790  (current_first_row[my_rank] <
791  (target_first_row[i][p] + target_nrow_local[i][p])))
792  {
793  First_row_for_proc[i][p] =
794  std::max(current_first_row[my_rank], target_first_row[i][p]);
795  Nrow_local_for_proc[i][p] =
796  std::min(
797  (current_first_row[my_rank] + current_nrow_local[my_rank]),
798  (target_first_row[i][p] + target_nrow_local[i][p])) -
799  First_row_for_proc[i][p];
800  }
801 
802  // and data to be received
803  if ((target_first_row[i][my_rank] <
804  (current_first_row[p] + current_nrow_local[p])) &&
805  (current_first_row[p] < (target_first_row[i][my_rank] +
806  target_nrow_local[i][my_rank])))
807  {
808  First_row_from_proc[i][p] =
809  std::max(current_first_row[p], target_first_row[i][my_rank]);
810  Nrow_local_from_proc[i][p] =
811  std::min((current_first_row[p] + current_nrow_local[p]),
812  (target_first_row[i][my_rank] +
813  target_nrow_local[i][my_rank])) -
815  }
816  }
817 
818  // resize nnz_send
819  nnz_send[i].resize(nproc);
820 
821  // compute the number of nnzs to be sent
822  // and the number of send and receive requests to be made (nreq)
823  for (unsigned p = 0; p < nproc; p++)
824  {
825  if (Nrow_local_for_proc[i][p] != 0)
826  {
827  int* row_start = matrix_pt[i]->row_start();
828  unsigned k =
829  First_row_for_proc[i][p] - current_first_row[my_rank];
830  nnz_send[i][p] =
831  row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
832  }
833  }
834 
835  // resize nnz_recv
836  nnz_recv[i].resize(nproc);
837 
838  // send nnz to be sent to each processor
839  for (unsigned p = 0; p < nproc; p++)
840  {
841  // send and recv
842 
843  // dont mpi send to self
844  if (p != my_rank)
845  {
846  // non block send
847  if (Nrow_local_for_proc[i][p] != 0)
848  {
849  // send to other processors
850  int tag = this->compute_tag(nproc, my_rank, p, 0);
851  MPI_Request tr;
852  req.push_back(tr);
853  MPI_Isend(&nnz_send[i][p],
854  1,
855  MPI_UNSIGNED,
856  p,
857  tag,
858  Global_communicator_pt->mpi_comm(),
859  &req[c]);
860  c++;
861  }
862 
863  // non blocking recv
864  if (Nrow_local_from_proc[i][p] != 0)
865  {
866  int tag = this->compute_tag(nproc, p, my_rank, 0);
867  MPI_Request tr;
868  req.push_back(tr);
869  MPI_Irecv(nnz_recv_temp + (i * nproc) + p,
870  1,
871  MPI_UNSIGNED,
872  p,
873  tag,
874  Global_communicator_pt->mpi_comm(),
875  &req[c]);
876  c++;
877  }
878  }
879  // receive from self
880  else
881  {
882  if (Nrow_local_for_proc[i][p] != 0)
883  {
884  nnz_recv_temp[(i * nproc) + p] = nnz_send[i][p];
885  }
886  }
887  }
888  }
889  }
890  if (c != 0)
891  {
892  Vector<MPI_Status> stat(c);
893  MPI_Waitall(c, &req[0], &stat[0]);
894  req.clear();
895  stat.clear();
896  }
897  c = 0;
898  for (unsigned i = 0; i < Nprec; i++)
899  {
900  for (unsigned p = 0; p < nproc; p++)
901  {
902  nnz_recv[i][p] = nnz_recv_temp[(i * nproc) + p];
903  }
904  }
905  delete nnz_recv_temp;
906 
907  // get the number of nnzs to be received from each processor
908 
909  // total number of nnz to be reveived
910  unsigned nnz_total = 0;
911  for (unsigned p = 0; p < nproc; p++)
912  {
913  nnz_total += nnz_recv[Color][p];
914  }
915 
916  // compute nnz block start
917  Vector<unsigned> nnz_start_proc;
918  Vector<unsigned> nnz_start_index;
919  unsigned row_ptr = target_first_row[Color][my_rank];
920  int p = 0;
921  unsigned nnz_ptr = 0;
922  for (p = 0; p < int(nproc); p++)
923  {
924  if (First_row_from_proc[Color][p] == row_ptr &&
925  Nrow_local_from_proc[Color][p] != 0 && nnz_ptr != nnz_total)
926  {
927  nnz_start_proc.push_back(p);
928  nnz_start_index.push_back(nnz_ptr);
929  nnz_ptr += nnz_recv[Color][p];
930  row_ptr += Nrow_local_from_proc[Color][p];
931  p = -1;
932  }
933  }
934 
935  // storage for derived datatypes
936  Vector<MPI_Datatype> datatypes;
937 
938  // storage for received data
939  double* values_recv = new double[nnz_total];
940  int* column_index_recv = new int[nnz_total];
941  int* row_start_recv = new int[target_nrow_local[Color][my_rank] + 1];
942 
943  /// ////////////////////////////////////////////////////////////////////////
944  // SEND
945  /// ////////////////////////////////////////////////////////////////////////
946  unsigned c_send = 0;
947  Vector<MPI_Request> send_req;
948 
949  // for every matrix we assemble the duplicate of the matrix on fewer
950  // processors and setup the preconditioner
951  for (unsigned i = 0; i < Nprec; i++)
952  {
953  // get pointers to the underlying data in the current matrix
954  double* values_send = matrix_pt[i]->value();
955  int* row_start_send = matrix_pt[i]->row_start();
956  int* column_index_send = matrix_pt[i]->column_index();
957 
958  // send and receive the contents of the vector
959  for (unsigned p = 0; p < nproc; p++)
960  {
961  // use mpi methods to send to and receive from all but my rank
962  if (p != my_rank)
963  {
964  // send
965  if (nnz_send[i][p] != 0)
966  {
967  // create 3 MPI contiguous datatypes
968  // + values
969  // + column_index
970  // + row_start
971 
972  // values
973  MPI_Datatype datatype_values;
974  MPI_Type_contiguous(
975  int(nnz_send[i][p]), MPI_DOUBLE, &datatype_values);
976  MPI_Type_commit(&datatype_values);
977  datatypes.push_back(datatype_values);
978 
979  // column index
980  MPI_Datatype datatype_column_index;
981  MPI_Type_contiguous(
982  int(nnz_send[i][p]), MPI_INT, &datatype_column_index);
983  MPI_Type_commit(&datatype_column_index);
984  datatypes.push_back(datatype_column_index);
985 
986  // row start
987  MPI_Datatype datatype_row_start;
988  MPI_Type_contiguous(
989  int(Nrow_local_for_proc[i][p]), MPI_INT, &datatype_row_start);
990  MPI_Type_commit(&datatype_row_start);
991  datatypes.push_back(datatype_row_start);
992 
993  // assemble the typelist
994  MPI_Datatype typelist[3];
995  typelist[0] = datatype_values;
996  typelist[1] = datatype_column_index;
997  typelist[2] = datatype_row_start;
998 
999  // compute the offset for row_start
1000  int offset_n =
1001  First_row_for_proc[i][p] - matrix_pt[i]->first_row(my_rank);
1002 
1003  // compute the offset for the values and column_index
1004  int offset_nnz = row_start_send[offset_n];
1005 
1006  // next compute the displacements
1007  MPI_Aint displacements[3];
1008  MPI_Get_address(values_send + offset_nnz, &displacements[0]);
1009  MPI_Get_address(column_index_send + offset_nnz,
1010  &displacements[1]);
1011  MPI_Get_address(row_start_send + offset_n, &displacements[2]);
1012  for (int j = 2; j >= 0; j--)
1013  {
1014  displacements[j] -= displacements[0];
1015  }
1016 
1017  // set the block lengths
1018  int block_length[3];
1019  block_length[0] = block_length[1] = block_length[2] = 1;
1020 
1021  // now build the final datatype
1022  MPI_Datatype send_type;
1023  MPI_Type_create_struct(
1024  3, block_length, displacements, typelist, &send_type);
1025  MPI_Type_commit(&send_type);
1026  datatypes.push_back(send_type);
1027 
1028  // send
1029  int tag = this->compute_tag(nproc, my_rank, p, 1);
1030  MPI_Request tr1;
1031  send_req.push_back(tr1);
1032  MPI_Isend(values_send + offset_nnz,
1033  1,
1034  send_type,
1035  p,
1036  tag,
1037  Global_communicator_pt->mpi_comm(),
1038  &send_req[c_send]);
1039  c_send++;
1040  }
1041  }
1042  }
1043  }
1044 
1045  /// ////////////////////////////////////////////////////////////////////////
1046  // RECV
1047  /// ////////////////////////////////////////////////////////////////////////
1048  unsigned c_recv = 0;
1049  Vector<MPI_Request> recv_req;
1050 
1051  // receive the contents of the vector
1052  for (unsigned p = 0; p < nproc; p++)
1053  {
1054  // use mpi methods to send to and receive from all but my rank
1055  if (p != my_rank)
1056  {
1057  // just receive
1058  if (nnz_recv[Color][p] != 0)
1059  {
1060  // create 3 MPI contiguous datatypes
1061  // + values
1062  // + column_index
1063  // + row_start
1064 
1065  // values
1066  MPI_Datatype datatype_values;
1067  MPI_Type_contiguous(
1068  int(nnz_recv[Color][p]), MPI_DOUBLE, &datatype_values);
1069  MPI_Type_commit(&datatype_values);
1070  datatypes.push_back(datatype_values);
1071 
1072  // column index
1073  MPI_Datatype datatype_column_index;
1074  MPI_Type_contiguous(
1075  int(nnz_recv[Color][p]), MPI_INT, &datatype_column_index);
1076  MPI_Type_commit(&datatype_column_index);
1077  datatypes.push_back(datatype_column_index);
1078 
1079  // row start
1080  MPI_Datatype datatype_row_start;
1081  MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),
1082  MPI_INT,
1083  &datatype_row_start);
1084  MPI_Type_commit(&datatype_row_start);
1085  datatypes.push_back(datatype_row_start);
1086 
1087  // assemble the typelist
1088  MPI_Datatype typelist[3];
1089  typelist[0] = datatype_values;
1090  typelist[1] = datatype_column_index;
1091  typelist[2] = datatype_row_start;
1092 
1093  // compute the offset for row_start
1094  int offset_n =
1095  First_row_from_proc[Color][p] - target_first_row[Color][my_rank];
1096 
1097  // compute the offset for the values and column_index
1098  unsigned k = 0;
1099  while (nnz_start_proc[k] != p)
1100  {
1101  k++;
1102  }
1103  int offset_nnz = nnz_start_index[k];
1104 
1105  // next compute the displacements
1106  MPI_Aint displacements[3];
1107  MPI_Get_address(values_recv + offset_nnz, &displacements[0]);
1108  MPI_Get_address(column_index_recv + offset_nnz, &displacements[1]);
1109  MPI_Get_address(row_start_recv + offset_n, &displacements[2]);
1110  for (int j = 2; j >= 0; j--)
1111  {
1112  displacements[j] -= displacements[0];
1113  }
1114 
1115  // set the block lengths
1116  int block_length[3];
1117  block_length[0] = block_length[1] = block_length[2] = 1;
1118 
1119  // now build the final datatype
1120  MPI_Datatype recv_type;
1121  MPI_Type_create_struct(
1122  3, block_length, displacements, typelist, &recv_type);
1123  MPI_Type_commit(&recv_type);
1124  datatypes.push_back(recv_type);
1125 
1126  // recv
1127  int tag = this->compute_tag(nproc, p, my_rank, 1);
1128  MPI_Request tr1;
1129  recv_req.push_back(tr1);
1130  MPI_Irecv(values_recv + offset_nnz,
1131  1,
1132  recv_type,
1133  p,
1134  tag,
1135  Global_communicator_pt->mpi_comm(),
1136  &recv_req[c_recv]);
1137  c_recv++;
1138  }
1139  }
1140  }
1141 
1142  // otherwise send to self (copy)
1143  if (nnz_recv[Color][my_rank] != 0)
1144  {
1145  // get pointers to the underlying data in the current matrix
1146  double* values_send = matrix_pt[Color]->value();
1147  int* row_start_send = matrix_pt[Color]->row_start();
1148  int* column_index_send = matrix_pt[Color]->column_index();
1149 
1150  // offset for row_start send to self
1151  unsigned offset_n_send = First_row_for_proc[Color][my_rank] -
1152  matrix_pt[Color]->first_row(my_rank);
1153 
1154  // offset for values and column+_index send to self
1155  unsigned offset_nnz_send = row_start_send[offset_n_send];
1156 
1157  // offset for row_start receive from self
1158  unsigned offset_n_recv = First_row_from_proc[Color][my_rank] -
1159  target_first_row[Color][my_rank];
1160 
1161  // offset for values and columm_index receive from self
1162  unsigned k = 0;
1163  while (nnz_start_proc[k] != my_rank)
1164  {
1165  k++;
1166  }
1167  unsigned offset_nnz_recv = nnz_start_index[k];
1168 
1169  // and send
1170 
1171  // values and column_index
1172  unsigned n_nnz = nnz_send[Color][my_rank];
1173  for (unsigned j = 0; j < n_nnz; j++)
1174  {
1175  values_recv[offset_nnz_recv + j] = values_send[offset_nnz_send + j];
1176  column_index_recv[offset_nnz_recv + j] =
1177  column_index_send[offset_nnz_send + j];
1178  }
1179 
1180  // row start
1181  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
1182  for (unsigned j = 0; j < n_n; j++)
1183  {
1184  row_start_recv[offset_n_recv + j] = row_start_send[offset_n_send + j];
1185  }
1186  }
1187 
1188  // create the local distribution for this matrix
1189  LinearAlgebraDistribution* temp_dist_pt =
1190  new LinearAlgebraDistribution(Local_communicator_pt,
1191  target_first_row[Color][my_rank],
1192  target_nrow_local[Color][my_rank]);
1193 
1194  // create the corresponding matrix
1195  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1196  delete temp_dist_pt; // (dist has now been copied)
1197 
1198  /// ////////////////////////////////////////////////////////////////////////
1199  // and WAIT...
1200  /// ////////////////////////////////////////////////////////////////////////
1201  if (c_recv != 0)
1202  {
1203  Vector<MPI_Status> recv_stat(c_recv);
1204  MPI_Waitall(c_recv, &recv_req[0], &recv_stat[0]);
1205  recv_req.clear();
1206  recv_stat.clear();
1207  }
1208 
1209  // build the matrix
1210 
1211  // update the row start
1212  unsigned nproc_contrib = nnz_start_index.size();
1213  for (unsigned j = 0; j < nproc_contrib; j++)
1214  {
1215  unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
1216  target_first_row[Color][my_rank];
1217  unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
1218  unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
1219  for (unsigned k = first; k < last; k++)
1220  {
1221  row_start_recv[k] += nnz_inc;
1222  }
1223  }
1224  row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
1225 
1226  // build the matrix without a copy of the data
1227  local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
1228  nnz_total,
1229  values_recv,
1230  column_index_recv,
1231  row_start_recv);
1232 
1233  // and finally wait for the sends
1234  if (c_recv != 0)
1235  {
1236  Vector<MPI_Status> send_stat(c_recv);
1237  MPI_Waitall(c_send, &send_req[0], &send_stat[0]);
1238  send_req.clear();
1239  send_stat.clear();
1240  }
1241 
1242  // and clear the datatype
1243  unsigned ndatatypes = datatypes.size();
1244  for (unsigned i = 0; i < ndatatypes; i++)
1245  {
1246  MPI_Type_free(&datatypes[i]);
1247  }
1248  }
1249 
1250 
1251  /// //////////////////////////////////////////////////////////////////////////
1252  /// //////////////////////////////////////////////////////////////////////////
1253  /// //////////////////////////////////////////////////////////////////////////
1254  /// //////////////////////////////////////////////////////////////////////////
1255 
1256 
1257  // METHOD 2
1258  else if (Method == 2)
1259  {
1260  // temporary storgage for nnz recv
1261  unsigned* nnz_recv_temp = new unsigned[nproc * Nprec];
1262  for (unsigned j = 0; j < nproc * Nprec; j++)
1263  {
1264  nnz_recv_temp[j] = 0;
1265  }
1266 
1267  // for every matrix we assemble the duplicate of the matrix on fewer
1268  // processors and setup the preconditioner
1269  for (unsigned i = 0; i < Nprec; i++)
1270  {
1271  // if the matrix is global (!distributed) then just construct a copy
1272  // on the subset of processors
1273  if (!matrix_pt[i]->distributed())
1274  {
1275  // if this matrix is to be preconditioned my this processor
1276  if (i == Color)
1277  {
1278  // create the local distribution for this matrix
1279  LinearAlgebraDistribution* temp_dist_pt =
1280  new LinearAlgebraDistribution(
1281  Local_communicator_pt, matrix_pt[i]->nrow(), false);
1282 
1283  // create the corresponding matrix
1284  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1285  delete temp_dist_pt; // (dist has now been copied)
1286 
1287  // get pointers to the underlying data
1288  double* values_pt = matrix_pt[i]->value();
1289  int* column_index_pt = matrix_pt[i]->column_index();
1290  int* row_start_pt = matrix_pt[i]->row_start();
1291 
1292  // build the matrix without a copy of the data
1293  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
1294  matrix_pt[i]->nnz(),
1295  values_pt,
1296  column_index_pt,
1297  row_start_pt);
1298  }
1299  }
1300 
1301  // if the matrix is global (!distributed) then just construct a copy
1302  // on the subset of processors
1303  else
1304  {
1305  // first compute the distribution of this preconditioner on its subset
1306  // of processors
1307 
1308  // number of rows for this preconditioner
1309  unsigned nrow = matrix_pt[i]->nrow();
1310 
1311  // setup First_row_for_local_prec and Nrow_local_for_local_prec
1312  target_first_row[i].resize(nproc);
1313  target_nrow_local[i].resize(nproc);
1314  unsigned nproc_local = Nproc_for_prec[i];
1315  for (unsigned p = 0; p < nproc_local; p++)
1316  {
1317  int pp = First_proc_for_prec[i] + p;
1318  target_first_row[i][pp] =
1319  unsigned(double(p * nrow) / double(nproc_local));
1320  }
1321  for (unsigned p = 0; p < nproc_local - 1; p++)
1322  {
1323  int pp = First_proc_for_prec[i] + p;
1324  target_nrow_local[i][pp] =
1325  target_first_row[i][pp + 1] - target_first_row[i][pp];
1326  }
1327  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
1328  target_nrow_local[i][last_local_proc] =
1329  nrow - target_first_row[i][last_local_proc];
1330 
1331  // get the details of the current distribution
1332  Vector<unsigned> current_first_row(nproc);
1333  Vector<unsigned> current_nrow_local(nproc);
1334  for (unsigned p = 0; p < nproc; p++)
1335  {
1336  current_first_row[p] = matrix_pt[i]->first_row(p);
1337  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
1338  }
1339 
1340  // resize storage for details of the data to be sent and received
1341  First_row_for_proc[i].resize(nproc, 0);
1342  Nrow_local_for_proc[i].resize(nproc, 0);
1343  First_row_from_proc[i].resize(nproc, 0);
1344  Nrow_local_from_proc[i].resize(nproc, 0);
1345 
1346  // for every processor compute first_row and nrow_local that will
1347  // will sent and received by this processor
1348  for (unsigned p = 0; p < nproc; p++)
1349  {
1350  // start with data to be sent
1351  if ((target_first_row[i][p] <
1352  (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
1353  (current_first_row[my_rank] <
1354  (target_first_row[i][p] + target_nrow_local[i][p])))
1355  {
1356  First_row_for_proc[i][p] =
1357  std::max(current_first_row[my_rank], target_first_row[i][p]);
1358  Nrow_local_for_proc[i][p] =
1359  std::min(
1360  (current_first_row[my_rank] + current_nrow_local[my_rank]),
1361  (target_first_row[i][p] + target_nrow_local[i][p])) -
1362  First_row_for_proc[i][p];
1363  }
1364 
1365  // and data to be received
1366  if ((target_first_row[i][my_rank] <
1367  (current_first_row[p] + current_nrow_local[p])) &&
1368  (current_first_row[p] < (target_first_row[i][my_rank] +
1369  target_nrow_local[i][my_rank])))
1370  {
1371  First_row_from_proc[i][p] =
1372  std::max(current_first_row[p], target_first_row[i][my_rank]);
1373  Nrow_local_from_proc[i][p] =
1374  std::min((current_first_row[p] + current_nrow_local[p]),
1375  (target_first_row[i][my_rank] +
1376  target_nrow_local[i][my_rank])) -
1377  First_row_from_proc[i][p];
1378  }
1379  }
1380 
1381  // resize nnz_send
1382  nnz_send[i].resize(nproc);
1383 
1384  // compute the number of nnzs to be sent
1385  // and the number of send and receive requests to be made (nreq)
1386  for (unsigned p = 0; p < nproc; p++)
1387  {
1388  if (Nrow_local_for_proc[i][p] != 0)
1389  {
1390  int* row_start = matrix_pt[i]->row_start();
1391  unsigned k =
1392  First_row_for_proc[i][p] - current_first_row[my_rank];
1393  nnz_send[i][p] =
1394  row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
1395  }
1396  }
1397 
1398  // resize nnz_recv
1399  nnz_recv[i].resize(nproc);
1400 
1401  // send nnz to be sent to each processor
1402  for (unsigned p = 0; p < nproc; p++)
1403  {
1404  // send and recv
1405 
1406  // dont mpi send to self
1407  if (p != my_rank)
1408  {
1409  // non block send
1410  if (Nrow_local_for_proc[i][p] != 0)
1411  {
1412  // send to other processors
1413  int tag = this->compute_tag(nproc, my_rank, p, 0);
1414  MPI_Request tr;
1415  req.push_back(tr);
1416  MPI_Isend(&nnz_send[i][p],
1417  1,
1418  MPI_UNSIGNED,
1419  p,
1420  tag,
1421  Global_communicator_pt->mpi_comm(),
1422  &req[c]);
1423  c++;
1424  }
1425 
1426  // non blocking recv
1427  if (Nrow_local_from_proc[i][p] != 0)
1428  {
1429  int tag = this->compute_tag(nproc, p, my_rank, 0);
1430  MPI_Request tr;
1431  req.push_back(tr);
1432  MPI_Irecv(nnz_recv_temp + (i * nproc) + p,
1433  1,
1434  MPI_UNSIGNED,
1435  p,
1436  tag,
1437  Global_communicator_pt->mpi_comm(),
1438  &req[c]);
1439  c++;
1440  }
1441  }
1442  // receive from self
1443  else
1444  {
1445  if (Nrow_local_for_proc[i][p] != 0)
1446  {
1447  nnz_recv_temp[(i * nproc) + p] = nnz_send[i][p];
1448  }
1449  }
1450  }
1451  }
1452  }
1453  if (c != 0)
1454  {
1455  Vector<MPI_Status> stat(c);
1456  MPI_Waitall(c, &req[0], &stat[0]);
1457  req.clear();
1458  stat.clear();
1459  c = 0;
1460  }
1461  for (unsigned i = 0; i < Nprec; i++)
1462  {
1463  for (unsigned p = 0; p < nproc; p++)
1464  {
1465  nnz_recv[i][p] = nnz_recv_temp[(i * nproc) + p];
1466  }
1467  }
1468  delete nnz_recv_temp;
1469 
1470  // get the number of nnzs to be received from each processor
1471 
1472  // total number of nnz to be reveived
1473  unsigned nnz_total = 0;
1474  for (unsigned p = 0; p < nproc; p++)
1475  {
1476  nnz_total += nnz_recv[Color][p];
1477  }
1478 
1479  // compute nnz block start
1480  Vector<unsigned> nnz_start_proc;
1481  Vector<unsigned> nnz_start_index;
1482  unsigned row_ptr = target_first_row[Color][my_rank];
1483  int p = 0;
1484  unsigned nnz_ptr = 0;
1485  for (p = 0; p < int(nproc); p++)
1486  {
1487  if (First_row_from_proc[Color][p] == row_ptr &&
1488  Nrow_local_from_proc[Color][p] != 0 && nnz_ptr != nnz_total)
1489  {
1490  nnz_start_proc.push_back(p);
1491  nnz_start_index.push_back(nnz_ptr);
1492  nnz_ptr += nnz_recv[Color][p];
1493  row_ptr += Nrow_local_from_proc[Color][p];
1494  p = -1;
1495  }
1496  }
1497 
1498  // storage for derived datatypes
1499  Vector<MPI_Datatype> datatypes;
1500 
1501  // storage for received data
1502  double* values_recv = new double[nnz_total];
1503  int* column_index_recv = new int[nnz_total];
1504  int* row_start_recv = new int[target_nrow_local[Color][my_rank] + 1];
1505 
1506  /// ////////////////////////////////////////////////////////////////////////
1507  // RECV
1508  /// ////////////////////////////////////////////////////////////////////////
1509  unsigned c_recv = 0;
1510  Vector<MPI_Request> recv_req;
1511 
1512  // receive the contents of the vector
1513  for (unsigned p = 0; p < nproc; p++)
1514  {
1515  // use mpi methods to send to and receive from all but my rank
1516  if (p != my_rank)
1517  {
1518  // just receive
1519  if (nnz_recv[Color][p] != 0)
1520  {
1521  // create 3 MPI contiguous datatypes
1522  // + values
1523  // + column_index
1524  // + row_start
1525 
1526  // values
1527  MPI_Datatype datatype_values;
1528  MPI_Type_contiguous(
1529  int(nnz_recv[Color][p]), MPI_DOUBLE, &datatype_values);
1530  MPI_Type_commit(&datatype_values);
1531  datatypes.push_back(datatype_values);
1532 
1533  // column index
1534  MPI_Datatype datatype_column_index;
1535  MPI_Type_contiguous(
1536  int(nnz_recv[Color][p]), MPI_INT, &datatype_column_index);
1537  MPI_Type_commit(&datatype_column_index);
1538  datatypes.push_back(datatype_column_index);
1539 
1540  // row start
1541  MPI_Datatype datatype_row_start;
1542  MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),
1543  MPI_INT,
1544  &datatype_row_start);
1545  MPI_Type_commit(&datatype_row_start);
1546  datatypes.push_back(datatype_row_start);
1547 
1548  // assemble the typelist
1549  MPI_Datatype typelist[3];
1550  typelist[0] = datatype_values;
1551  typelist[1] = datatype_column_index;
1552  typelist[2] = datatype_row_start;
1553 
1554  // compute the offset for row_start
1555  int offset_n =
1556  First_row_from_proc[Color][p] - target_first_row[Color][my_rank];
1557 
1558  // compute the offset for the values and column_index
1559  unsigned k = 0;
1560  while (nnz_start_proc[k] != p)
1561  {
1562  k++;
1563  }
1564  int offset_nnz = nnz_start_index[k];
1565 
1566  // next compute the displacements
1567  MPI_Aint displacements[3];
1568  MPI_Get_address(values_recv + offset_nnz, &displacements[0]);
1569  MPI_Get_address(column_index_recv + offset_nnz, &displacements[1]);
1570  MPI_Get_address(row_start_recv + offset_n, &displacements[2]);
1571  for (int j = 2; j >= 0; j--)
1572  {
1573  displacements[j] -= displacements[0];
1574  }
1575 
1576  // set the block lengths
1577  int block_length[3];
1578  block_length[0] = block_length[1] = block_length[2] = 1;
1579 
1580  // now build the final datatype
1581  MPI_Datatype recv_type;
1582  MPI_Type_create_struct(
1583  3, block_length, displacements, typelist, &recv_type);
1584  MPI_Type_commit(&recv_type);
1585  datatypes.push_back(recv_type);
1586 
1587  // recv
1588  int tag = this->compute_tag(nproc, p, my_rank, 1);
1589  MPI_Request tr1;
1590  recv_req.push_back(tr1);
1591  MPI_Irecv(values_recv + offset_nnz,
1592  1,
1593  recv_type,
1594  p,
1595  tag,
1596  Global_communicator_pt->mpi_comm(),
1597  &recv_req[c_recv]);
1598  c_recv++;
1599  }
1600  }
1601  }
1602 
1603  /// ////////////////////////////////////////////////////////////////////////
1604  // SEND
1605  /// ////////////////////////////////////////////////////////////////////////
1606  unsigned c_send = 0;
1607  Vector<MPI_Request> send_req;
1608 
1609  // for every matrix we assemble the duplicate of the matrix on fewer
1610  // processors and setup the preconditioner
1611  for (unsigned i = 0; i < Nprec; i++)
1612  {
1613  // get pointers to the underlying data in the current matrix
1614  double* values_send = matrix_pt[i]->value();
1615  int* row_start_send = matrix_pt[i]->row_start();
1616  int* column_index_send = matrix_pt[i]->column_index();
1617 
1618  // send and receive the contents of the vector
1619  for (unsigned p = 0; p < nproc; p++)
1620  {
1621  // use mpi methods to send to and receive from all but my rank
1622  if (p != my_rank)
1623  {
1624  // send
1625  if (nnz_send[i][p] != 0)
1626  {
1627  // create 3 MPI contiguous datatypes
1628  // + values
1629  // + column_index
1630  // + row_start
1631 
1632  // values
1633  MPI_Datatype datatype_values;
1634  MPI_Type_contiguous(
1635  int(nnz_send[i][p]), MPI_DOUBLE, &datatype_values);
1636  MPI_Type_commit(&datatype_values);
1637  datatypes.push_back(datatype_values);
1638 
1639  // column index
1640  MPI_Datatype datatype_column_index;
1641  MPI_Type_contiguous(
1642  int(nnz_send[i][p]), MPI_INT, &datatype_column_index);
1643  MPI_Type_commit(&datatype_column_index);
1644  datatypes.push_back(datatype_column_index);
1645 
1646  // row start
1647  MPI_Datatype datatype_row_start;
1648  MPI_Type_contiguous(
1649  int(Nrow_local_for_proc[i][p]), MPI_INT, &datatype_row_start);
1650  MPI_Type_commit(&datatype_row_start);
1651  datatypes.push_back(datatype_row_start);
1652 
1653  // assemble the typelist
1654  MPI_Datatype typelist[3];
1655  typelist[0] = datatype_values;
1656  typelist[1] = datatype_column_index;
1657  typelist[2] = datatype_row_start;
1658 
1659  // compute the offset for row_start
1660  int offset_n =
1661  First_row_for_proc[i][p] - matrix_pt[i]->first_row(my_rank);
1662 
1663  // compute the offset for the values and column_index
1664  int offset_nnz = row_start_send[offset_n];
1665 
1666  // next compute the displacements
1667  MPI_Aint displacements[3];
1668  MPI_Get_address(values_send + offset_nnz, &displacements[0]);
1669  MPI_Get_address(column_index_send + offset_nnz,
1670  &displacements[1]);
1671  MPI_Get_address(row_start_send + offset_n, &displacements[2]);
1672  for (int j = 2; j >= 0; j--)
1673  {
1674  displacements[j] -= displacements[0];
1675  }
1676 
1677  // set the block lengths
1678  int block_length[3];
1679  block_length[0] = block_length[1] = block_length[2] = 1;
1680 
1681  // now build the final datatype
1682  MPI_Datatype send_type;
1683  MPI_Type_create_struct(
1684  3, block_length, displacements, typelist, &send_type);
1685  MPI_Type_commit(&send_type);
1686  datatypes.push_back(send_type);
1687 
1688  // send
1689  int tag = this->compute_tag(nproc, my_rank, p, 1);
1690  MPI_Request tr1;
1691  send_req.push_back(tr1);
1692  MPI_Isend(values_send + offset_nnz,
1693  1,
1694  send_type,
1695  p,
1696  tag,
1697  Global_communicator_pt->mpi_comm(),
1698  &send_req[c_send]);
1699  c_send++;
1700  }
1701  }
1702  }
1703  }
1704 
1705  // otherwise send to self (copy)
1706  if (nnz_recv[Color][my_rank] != 0)
1707  {
1708  // get pointers to the underlying data in the current matrix
1709  double* values_send = matrix_pt[Color]->value();
1710  int* row_start_send = matrix_pt[Color]->row_start();
1711  int* column_index_send = matrix_pt[Color]->column_index();
1712 
1713  // offset for row_start send to self
1714  unsigned offset_n_send = First_row_for_proc[Color][my_rank] -
1715  matrix_pt[Color]->first_row(my_rank);
1716 
1717  // offset for values and column+_index send to self
1718  unsigned offset_nnz_send = row_start_send[offset_n_send];
1719 
1720  // offset for row_start receive from self
1721  unsigned offset_n_recv = First_row_from_proc[Color][my_rank] -
1722  target_first_row[Color][my_rank];
1723 
1724  // offset for values and columm_index receive from self
1725  unsigned k = 0;
1726  while (nnz_start_proc[k] != my_rank)
1727  {
1728  k++;
1729  }
1730  unsigned offset_nnz_recv = nnz_start_index[k];
1731 
1732  // and send
1733 
1734  // values and column_index
1735  unsigned n_nnz = nnz_send[Color][my_rank];
1736  for (unsigned j = 0; j < n_nnz; j++)
1737  {
1738  values_recv[offset_nnz_recv + j] = values_send[offset_nnz_send + j];
1739  column_index_recv[offset_nnz_recv + j] =
1740  column_index_send[offset_nnz_send + j];
1741  }
1742 
1743  // row start
1744  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
1745  for (unsigned j = 0; j < n_n; j++)
1746  {
1747  row_start_recv[offset_n_recv + j] = row_start_send[offset_n_send + j];
1748  }
1749  }
1750 
1751  // create the local distribution for this matrix
1752  LinearAlgebraDistribution* temp_dist_pt =
1753  new LinearAlgebraDistribution(Local_communicator_pt,
1754  target_first_row[Color][my_rank],
1755  target_nrow_local[Color][my_rank]);
1756 
1757  // create the corresponding matrix
1758  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1759  delete temp_dist_pt; // (dist has now been copied)
1760 
1761  /// ////////////////////////////////////////////////////////////////////////
1762  // and WAIT...
1763  /// ////////////////////////////////////////////////////////////////////////
1764  if (c_recv != 0)
1765  {
1766  Vector<MPI_Status> recv_stat(c_recv);
1767  MPI_Waitall(c_recv, &recv_req[0], &recv_stat[0]);
1768  recv_req.clear();
1769  recv_stat.clear();
1770  }
1771 
1772  // build the matrix
1773 
1774  // update the row start
1775  unsigned nproc_contrib = nnz_start_index.size();
1776  for (unsigned j = 0; j < nproc_contrib; j++)
1777  {
1778  unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
1779  target_first_row[Color][my_rank];
1780  unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
1781  unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
1782  for (unsigned k = first; k < last; k++)
1783  {
1784  row_start_recv[k] += nnz_inc;
1785  }
1786  }
1787  row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
1788 
1789  // build the matrix without a copy of the data
1790  local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
1791  nnz_total,
1792  values_recv,
1793  column_index_recv,
1794  row_start_recv);
1795 
1796  // and finally wait for the sends
1797  if (c_send != 0)
1798  {
1799  Vector<MPI_Status> send_stat(c_send);
1800  MPI_Waitall(c_send, &send_req[0], &send_stat[0]);
1801  send_req.clear();
1802  send_stat.clear();
1803  }
1804 
1805  // and clear the datatype
1806  unsigned ndatatypes = datatypes.size();
1807  for (unsigned i = 0; i < ndatatypes; i++)
1808  {
1809  MPI_Type_free(&datatypes[i]);
1810  }
1811  }
1812 
1813 
1814  /// //////////////////////////////////////////////////////////////////////////
1815  /// //////////////////////////////////////////////////////////////////////////
1816  /// //////////////////////////////////////////////////////////////////////////
1817  /// //////////////////////////////////////////////////////////////////////////
1818 
1819 
1820  // METHOD 3
1821  else if (Method == 3)
1822  {
1823  // temporary storgage for nnz recv
1824  unsigned* nnz_recv_temp = new unsigned[nproc * Nprec];
1825  for (unsigned j = 0; j < nproc * Nprec; j++)
1826  {
1827  nnz_recv_temp[j] = 0;
1828  }
1829 
1830  // for every matrix we assemble the duplicate of the matrix on fewer
1831  // processors and setup the preconditioner
1832  for (unsigned i = 0; i < Nprec; i++)
1833  {
1834  // if the matrix is global (!distributed) then just construct a copy
1835  // on the subset of processors
1836  if (!matrix_pt[i]->distributed())
1837  {
1838  // if this matrix is to be preconditioned my this processor
1839  if (i == Color)
1840  {
1841  // create the local distribution for this matrix
1842  LinearAlgebraDistribution* temp_dist_pt =
1843  new LinearAlgebraDistribution(
1844  Local_communicator_pt, matrix_pt[i]->nrow(), false);
1845 
1846  // create the corresponding matrix
1847  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1848  delete temp_dist_pt; // (dist has now been copied)
1849 
1850  // get pointers to the underlying data
1851  double* values_pt = matrix_pt[i]->value();
1852  int* column_index_pt = matrix_pt[i]->column_index();
1853  int* row_start_pt = matrix_pt[i]->row_start();
1854 
1855  // build the matrix without a copy of the data
1856  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
1857  matrix_pt[i]->nnz(),
1858  values_pt,
1859  column_index_pt,
1860  row_start_pt);
1861  }
1862  }
1863 
1864  // if the matrix is global (!distributed) then just construct a copy
1865  // on the subset of processors
1866  else
1867  {
1868  // first compute the distribution of this preconditioner on its subset
1869  // of processors
1870 
1871  // number of rows for this preconditioner
1872  unsigned nrow = matrix_pt[i]->nrow();
1873 
1874  // setup First_row_for_local_prec and Nrow_local_for_local_prec
1875  target_first_row[i].resize(nproc);
1876  target_nrow_local[i].resize(nproc);
1877  unsigned nproc_local = Nproc_for_prec[i];
1878  for (unsigned p = 0; p < nproc_local; p++)
1879  {
1880  int pp = First_proc_for_prec[i] + p;
1881  target_first_row[i][pp] =
1882  unsigned(double(p * nrow) / double(nproc_local));
1883  }
1884  for (unsigned p = 0; p < nproc_local - 1; p++)
1885  {
1886  int pp = First_proc_for_prec[i] + p;
1887  target_nrow_local[i][pp] =
1888  target_first_row[i][pp + 1] - target_first_row[i][pp];
1889  }
1890  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
1891  target_nrow_local[i][last_local_proc] =
1892  nrow - target_first_row[i][last_local_proc];
1893 
1894  // get the details of the current distribution
1895  Vector<unsigned> current_first_row(nproc);
1896  Vector<unsigned> current_nrow_local(nproc);
1897  for (unsigned p = 0; p < nproc; p++)
1898  {
1899  current_first_row[p] = matrix_pt[i]->first_row(p);
1900  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
1901  }
1902 
1903  // resize storage for details of the data to be sent and received
1904  First_row_for_proc[i].resize(nproc, 0);
1905  Nrow_local_for_proc[i].resize(nproc, 0);
1906  First_row_from_proc[i].resize(nproc, 0);
1907  Nrow_local_from_proc[i].resize(nproc, 0);
1908 
1909  // for every processor compute first_row and nrow_local that will
1910  // will sent and received by this processor
1911  for (unsigned p = 0; p < nproc; p++)
1912  {
1913  // start with data to be sent
1914  if ((target_first_row[i][p] <
1915  (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
1916  (current_first_row[my_rank] <
1917  (target_first_row[i][p] + target_nrow_local[i][p])))
1918  {
1919  First_row_for_proc[i][p] =
1920  std::max(current_first_row[my_rank], target_first_row[i][p]);
1921  Nrow_local_for_proc[i][p] =
1922  std::min(
1923  (current_first_row[my_rank] + current_nrow_local[my_rank]),
1924  (target_first_row[i][p] + target_nrow_local[i][p])) -
1925  First_row_for_proc[i][p];
1926  }
1927 
1928  // and data to be received
1929  if ((target_first_row[i][my_rank] <
1930  (current_first_row[p] + current_nrow_local[p])) &&
1931  (current_first_row[p] < (target_first_row[i][my_rank] +
1932  target_nrow_local[i][my_rank])))
1933  {
1934  First_row_from_proc[i][p] =
1935  std::max(current_first_row[p], target_first_row[i][my_rank]);
1936  Nrow_local_from_proc[i][p] =
1937  std::min((current_first_row[p] + current_nrow_local[p]),
1938  (target_first_row[i][my_rank] +
1939  target_nrow_local[i][my_rank])) -
1940  First_row_from_proc[i][p];
1941  }
1942  }
1943 
1944  // resize nnz_send
1945  nnz_send[i].resize(nproc);
1946 
1947  // compute the number of nnzs to be sent
1948  // and the number of send and receive requests to be made (nreq)
1949  for (unsigned p = 0; p < nproc; p++)
1950  {
1951  if (Nrow_local_for_proc[i][p] != 0)
1952  {
1953  int* row_start = matrix_pt[i]->row_start();
1954  unsigned k =
1955  First_row_for_proc[i][p] - current_first_row[my_rank];
1956  nnz_send[i][p] =
1957  row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
1958  }
1959  }
1960 
1961  // resize nnz_recv
1962  nnz_recv[i].resize(nproc);
1963 
1964  // send nnz to be sent to each processor
1965  for (unsigned p = 0; p < nproc; p++)
1966  {
1967  // send and recv
1968 
1969  // dont mpi send to self
1970  if (p != my_rank)
1971  {
1972  // non block send
1973  if (Nrow_local_for_proc[i][p] != 0)
1974  {
1975  // send to other processors
1976  int tag = this->compute_tag(nproc, my_rank, p, 0);
1977  MPI_Request tr;
1978  req.push_back(tr);
1979  MPI_Isend(&nnz_send[i][p],
1980  1,
1981  MPI_UNSIGNED,
1982  p,
1983  tag,
1984  Global_communicator_pt->mpi_comm(),
1985  &req[c]);
1986  c++;
1987  }
1988  }
1989  // receive from self
1990  else
1991  {
1992  if (Nrow_local_for_proc[i][p] != 0)
1993  {
1994  nnz_recv_temp[(i * nproc) + p] = nnz_send[i][p];
1995  }
1996  }
1997  }
1998  }
1999  }
2000 
2001  for (unsigned i = 0; i < Nprec; i++)
2002  {
2003  // resize nnz_recv
2004  nnz_recv[i].resize(nproc);
2005 
2006  // receive nnz from other processors
2007  for (unsigned pp = 0; pp < nproc; pp++)
2008  {
2009  // next processor to receive from
2010  unsigned p = (nproc + my_rank - pp) % nproc;
2011 
2012  // dont mpi send to send
2013  if (p != my_rank)
2014  {
2015  // blocking recv
2016  if (Nrow_local_from_proc[i][p] != 0)
2017  {
2018  int tag = this->compute_tag(nproc, p, my_rank, 0);
2019  MPI_Status stat;
2020  unsigned nnz_temp;
2021  MPI_Recv(&nnz_temp,
2022  1,
2023  MPI_UNSIGNED,
2024  p,
2025  tag,
2026  Global_communicator_pt->mpi_comm(),
2027  &stat);
2028  nnz_recv[i][p] = nnz_temp;
2029  }
2030  }
2031 
2032  // receive from self
2033  else
2034  {
2035  nnz_recv[i][p] = nnz_send[i][p];
2036  }
2037  }
2038  }
2039 
2040  // get the number of nnzs to be received from each processor
2041 
2042  // total number of nnz to be reveived
2043  unsigned nnz_total = 0;
2044  for (unsigned p = 0; p < nproc; p++)
2045  {
2046  nnz_total += nnz_recv[Color][p];
2047  }
2048 
2049  // compute nnz block start
2050  Vector<unsigned> nnz_start_proc;
2051  Vector<unsigned> nnz_start_index;
2052  unsigned row_ptr = target_first_row[Color][my_rank];
2053  int p = 0;
2054  unsigned nnz_ptr = 0;
2055  for (p = 0; p < int(nproc); p++)
2056  {
2057  if (First_row_from_proc[Color][p] == row_ptr &&
2058  Nrow_local_from_proc[Color][p] != 0 && nnz_ptr != nnz_total)
2059  {
2060  nnz_start_proc.push_back(p);
2061  nnz_start_index.push_back(nnz_ptr);
2062  nnz_ptr += nnz_recv[Color][p];
2063  row_ptr += Nrow_local_from_proc[Color][p];
2064  p = -1;
2065  }
2066  }
2067 
2068  // storage for derived datatypes
2069  Vector<MPI_Datatype> datatypes;
2070 
2071  // storage for received data
2072  double* values_recv = new double[nnz_total];
2073  int* column_index_recv = new int[nnz_total];
2074  int* row_start_recv = new int[target_nrow_local[Color][my_rank] + 1];
2075 
2076  /// ////////////////////////////////////////////////////////////////////////
2077  // RECV
2078  /// ////////////////////////////////////////////////////////////////////////
2079  unsigned c_recv = 0;
2080  Vector<MPI_Request> recv_req;
2081 
2082  // receive the contents of the vector
2083  for (unsigned p = 0; p < nproc; p++)
2084  {
2085  // use mpi methods to send to and receive from all but my rank
2086  if (p != my_rank)
2087  {
2088  // just receive
2089  if (nnz_recv[Color][p] != 0)
2090  {
2091  // create 3 MPI contiguous datatypes
2092  // + values
2093  // + column_index
2094  // + row_start
2095 
2096  // values
2097  MPI_Datatype datatype_values;
2098  MPI_Type_contiguous(
2099  int(nnz_recv[Color][p]), MPI_DOUBLE, &datatype_values);
2100  MPI_Type_commit(&datatype_values);
2101  datatypes.push_back(datatype_values);
2102 
2103  // column index
2104  MPI_Datatype datatype_column_index;
2105  MPI_Type_contiguous(
2106  int(nnz_recv[Color][p]), MPI_INT, &datatype_column_index);
2107  MPI_Type_commit(&datatype_column_index);
2108  datatypes.push_back(datatype_column_index);
2109 
2110  // row start
2111  MPI_Datatype datatype_row_start;
2112  MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),
2113  MPI_INT,
2114  &datatype_row_start);
2115  MPI_Type_commit(&datatype_row_start);
2116  datatypes.push_back(datatype_row_start);
2117 
2118  // assemble the typelist
2119  MPI_Datatype typelist[3];
2120  typelist[0] = datatype_values;
2121  typelist[1] = datatype_column_index;
2122  typelist[2] = datatype_row_start;
2123 
2124  // compute the offset for row_start
2125  int offset_n =
2126  First_row_from_proc[Color][p] - target_first_row[Color][my_rank];
2127 
2128  // compute the offset for the values and column_index
2129  unsigned k = 0;
2130  while (nnz_start_proc[k] != p)
2131  {
2132  k++;
2133  }
2134  int offset_nnz = nnz_start_index[k];
2135 
2136  // next compute the displacements
2137  MPI_Aint displacements[3];
2138  MPI_Get_address(values_recv + offset_nnz, &displacements[0]);
2139  MPI_Get_address(column_index_recv + offset_nnz, &displacements[1]);
2140  MPI_Get_address(row_start_recv + offset_n, &displacements[2]);
2141  for (int j = 2; j >= 0; j--)
2142  {
2143  displacements[j] -= displacements[0];
2144  }
2145 
2146  // set the block lengths
2147  int block_length[3];
2148  block_length[0] = block_length[1] = block_length[2] = 1;
2149 
2150  // now build the final datatype
2151  MPI_Datatype recv_type;
2152  MPI_Type_create_struct(
2153  3, block_length, displacements, typelist, &recv_type);
2154  MPI_Type_commit(&recv_type);
2155  datatypes.push_back(recv_type);
2156 
2157  // recv
2158  int tag = this->compute_tag(nproc, p, my_rank, 1);
2159  MPI_Request tr1;
2160  recv_req.push_back(tr1);
2161  MPI_Irecv(values_recv + offset_nnz,
2162  1,
2163  recv_type,
2164  p,
2165  tag,
2166  Global_communicator_pt->mpi_comm(),
2167  &recv_req[c_recv]);
2168  c_recv++;
2169  }
2170  }
2171  }
2172 
2173  /// ////////////////////////////////////////////////////////////////////////
2174  // SEND
2175  /// ////////////////////////////////////////////////////////////////////////
2176  unsigned c_send = 0;
2177  Vector<MPI_Request> send_req;
2178 
2179  // for every matrix we assemble the duplicate of the matrix on fewer
2180  // processors and setup the preconditioner
2181  for (unsigned i = 0; i < Nprec; i++)
2182  {
2183  // get pointers to the underlying data in the current matrix
2184  double* values_send = matrix_pt[i]->value();
2185  int* row_start_send = matrix_pt[i]->row_start();
2186  int* column_index_send = matrix_pt[i]->column_index();
2187 
2188  // send and receive the contents of the vector
2189  for (unsigned p = 0; p < nproc; p++)
2190  {
2191  // use mpi methods to send to and receive from all but my rank
2192  if (p != my_rank)
2193  {
2194  // send
2195  if (nnz_send[i][p] != 0)
2196  {
2197  // create 3 MPI contiguous datatypes
2198  // + values
2199  // + column_index
2200  // + row_start
2201 
2202  // values
2203  MPI_Datatype datatype_values;
2204  MPI_Type_contiguous(
2205  int(nnz_send[i][p]), MPI_DOUBLE, &datatype_values);
2206  MPI_Type_commit(&datatype_values);
2207  datatypes.push_back(datatype_values);
2208 
2209  // column index
2210  MPI_Datatype datatype_column_index;
2211  MPI_Type_contiguous(
2212  int(nnz_send[i][p]), MPI_INT, &datatype_column_index);
2213  MPI_Type_commit(&datatype_column_index);
2214  datatypes.push_back(datatype_column_index);
2215 
2216  // row start
2217  MPI_Datatype datatype_row_start;
2218  MPI_Type_contiguous(
2219  int(Nrow_local_for_proc[i][p]), MPI_INT, &datatype_row_start);
2220  MPI_Type_commit(&datatype_row_start);
2221  datatypes.push_back(datatype_row_start);
2222 
2223  // assemble the typelist
2224  MPI_Datatype typelist[3];
2225  typelist[0] = datatype_values;
2226  typelist[1] = datatype_column_index;
2227  typelist[2] = datatype_row_start;
2228 
2229  // compute the offset for row_start
2230  int offset_n =
2231  First_row_for_proc[i][p] - matrix_pt[i]->first_row(my_rank);
2232 
2233  // compute the offset for the values and column_index
2234  int offset_nnz = row_start_send[offset_n];
2235 
2236  // next compute the displacements
2237  MPI_Aint displacements[3];
2238  MPI_Get_address(values_send + offset_nnz, &displacements[0]);
2239  MPI_Get_address(column_index_send + offset_nnz,
2240  &displacements[1]);
2241  MPI_Get_address(row_start_send + offset_n, &displacements[2]);
2242  for (int j = 2; j >= 0; j--)
2243  {
2244  displacements[j] -= displacements[0];
2245  }
2246 
2247  // set the block lengths
2248  int block_length[3];
2249  block_length[0] = block_length[1] = block_length[2] = 1;
2250 
2251  // now build the final datatype
2252  MPI_Datatype send_type;
2253  MPI_Type_create_struct(
2254  3, block_length, displacements, typelist, &send_type);
2255  MPI_Type_commit(&send_type);
2256  datatypes.push_back(send_type);
2257 
2258  // send
2259  int tag = this->compute_tag(nproc, my_rank, p, 1);
2260  MPI_Request tr1;
2261  send_req.push_back(tr1);
2262  MPI_Isend(values_send + offset_nnz,
2263  1,
2264  send_type,
2265  p,
2266  tag,
2267  Global_communicator_pt->mpi_comm(),
2268  &send_req[c_send]);
2269  c_send++;
2270  }
2271  }
2272  }
2273  }
2274 
2275  // otherwise send to self (copy)
2276  if (nnz_recv[Color][my_rank] != 0)
2277  {
2278  // get pointers to the underlying data in the current matrix
2279  double* values_send = matrix_pt[Color]->value();
2280  int* row_start_send = matrix_pt[Color]->row_start();
2281  int* column_index_send = matrix_pt[Color]->column_index();
2282 
2283  // offset for row_start send to self
2284  unsigned offset_n_send = First_row_for_proc[Color][my_rank] -
2285  matrix_pt[Color]->first_row(my_rank);
2286 
2287  // offset for values and column+_index send to self
2288  unsigned offset_nnz_send = row_start_send[offset_n_send];
2289 
2290  // offset for row_start receive from self
2291  unsigned offset_n_recv = First_row_from_proc[Color][my_rank] -
2292  target_first_row[Color][my_rank];
2293 
2294  // offset for values and columm_index receive from self
2295  unsigned k = 0;
2296  while (nnz_start_proc[k] != my_rank)
2297  {
2298  k++;
2299  }
2300  unsigned offset_nnz_recv = nnz_start_index[k];
2301 
2302  // and send
2303 
2304  // values and column_index
2305  unsigned n_nnz = nnz_send[Color][my_rank];
2306  for (unsigned j = 0; j < n_nnz; j++)
2307  {
2308  values_recv[offset_nnz_recv + j] = values_send[offset_nnz_send + j];
2309  column_index_recv[offset_nnz_recv + j] =
2310  column_index_send[offset_nnz_send + j];
2311  }
2312 
2313  // row start
2314  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2315  for (unsigned j = 0; j < n_n; j++)
2316  {
2317  row_start_recv[offset_n_recv + j] = row_start_send[offset_n_send + j];
2318  }
2319  }
2320 
2321  // create the local distribution for this matrix
2322  LinearAlgebraDistribution* temp_dist_pt =
2323  new LinearAlgebraDistribution(Local_communicator_pt,
2324  target_first_row[Color][my_rank],
2325  target_nrow_local[Color][my_rank]);
2326 
2327  // create the corresponding matrix
2328  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
2329  delete temp_dist_pt; // (dist has now been copied)
2330 
2331  /// ////////////////////////////////////////////////////////////////////////
2332  // and WAIT...
2333  /// ////////////////////////////////////////////////////////////////////////
2334  if (c_recv != 0)
2335  {
2336  Vector<MPI_Status> recv_stat(c_recv);
2337  MPI_Waitall(c_recv, &recv_req[0], &recv_stat[0]);
2338  recv_req.clear();
2339  recv_stat.clear();
2340  }
2341 
2342  // build the matrix
2343 
2344  // update the row start
2345  unsigned nproc_contrib = nnz_start_index.size();
2346  for (unsigned j = 0; j < nproc_contrib; j++)
2347  {
2348  unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
2349  target_first_row[Color][my_rank];
2350  unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
2351  unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
2352  for (unsigned k = first; k < last; k++)
2353  {
2354  row_start_recv[k] += nnz_inc;
2355  }
2356  }
2357  row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
2358 
2359  // build the matrix without a copy of the data
2360  local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
2361  nnz_total,
2362  values_recv,
2363  column_index_recv,
2364  row_start_recv);
2365 
2366  // and finally wait for the sends
2367  if (c_recv != 0)
2368  {
2369  Vector<MPI_Status> send_stat(c_recv);
2370  MPI_Waitall(c_send, &send_req[0], &send_stat[0]);
2371  send_req.clear();
2372  send_stat.clear();
2373  }
2374 
2375  // and clear the datatype
2376  unsigned ndatatypes = datatypes.size();
2377  for (unsigned i = 0; i < ndatatypes; i++)
2378  {
2379  MPI_Type_free(&datatypes[i]);
2380  }
2381  }
2382 
2383  // now setup the preconditioner
2384  Preconditioner_pt = prec_pt[Color];
2385  Preconditioner_pt->setup(local_matrix_pt);
2386 
2387  // clean up memory
2388  if (matrix_pt[0]->distributed())
2389  {
2390  delete local_matrix_pt;
2391  }
2392 
2393  // delete the preconditioners not used on this processor
2394  for (unsigned i = 0; i < Nprec; i++)
2395  {
2396  if (i != Color)
2397  {
2398  delete prec_pt[i];
2399  }
2400  }
2401  } // end of setup_preconditioners()
2402 
2403  //============================================================================
2404  /// Applies each preconditioner to the corresponding vector in
2405  /// r and z
2406  //=============================================================================
2407  void PreconditionerArray::solve_preconditioners(const Vector<DoubleVector>& r,
2408  Vector<DoubleVector>& z)
2409  {
2410 #ifdef PARANOID
2411  // check that a preconditioner has been setup
2412  if (Preconditioner_pt == 0)
2413  {
2414  std::ostringstream error_message;
2415  error_message << "The preconditioners have not been setup.";
2416  throw OomphLibError(
2417  error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2418  }
2419 
2420  // check that r is the correct length
2421  if (r.size() != Nprec)
2422  {
2423  std::ostringstream error_message;
2424  error_message << "This PreconditionerArray has " << Nprec
2425  << " preconditioners but r only contains " << r.size()
2426  << " preconditioners.";
2427  throw OomphLibError(
2428  error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2429  }
2430 
2431  // check that z is the correct length
2432  if (z.size() != Nprec)
2433  {
2434  std::ostringstream error_message;
2435  error_message << "This PreconditionerArray has " << Nprec
2436  << " preconditioners but z only contains " << z.size()
2437  << " preconditioners.";
2438  throw OomphLibError(
2439  error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2440  }
2441  // check that the vector has the same distribution as the
2442  // preconditioner
2443  for (unsigned i = 0; i < Nprec; i++)
2444  {
2445  if (*r[i].distribution_pt() != *Distribution_pt[i])
2446  {
2447  std::ostringstream error_message;
2448  error_message << "The distribution of r[" << i << "] does not have the"
2449  << " the same distribution as the matrix_pt[" << i
2450  << "] that was passed to setup_preconditioners(...)";
2451  throw OomphLibError(error_message.str(),
2452  OOMPH_CURRENT_FUNCTION,
2453  OOMPH_EXCEPTION_LOCATION);
2454  }
2455  }
2456 #endif
2457 
2458  // the local r vector
2459  DoubleVector local_r(Preconditioner_pt->distribution_pt(), 0.0);
2460 
2461  // number of processors
2462  unsigned nproc = Global_communicator_pt->nproc();
2463 
2464  // cache my global rank
2465  unsigned my_rank = Global_communicator_pt->my_rank();
2466 
2467  // send and receive requests
2468  Vector<MPI_Request> send_reqs;
2469  Vector<MPI_Request> recv_reqs;
2470 
2471  // cache first_row
2472  unsigned first_row = Preconditioner_pt->first_row();
2473 
2474  // local residual values for this processor
2475  double* local_r_values = local_r.values_pt();
2476 
2477  // for every vector we assemble the duplicate of the vector on the
2478  // appropirate subset of processors
2479 
2480  // first we post the non-blocking sends and recvs
2481  for (unsigned i = 0; i < Nprec; i++)
2482  {
2483  if (r[i].distributed())
2484  {
2485  // current first_row and nrow_local
2486  unsigned current_first_row = r[i].first_row();
2487 
2488  // send and receive the contents of the vector
2489  for (unsigned p = 0; p < nproc; p++)
2490  {
2491  // use mpi methods to send to and receive from all but my rank
2492  if (p != my_rank)
2493  {
2494  // send
2495  if (Nrow_local_for_proc[i][p] != 0)
2496  {
2497  // compute the offset for the values
2498  int offset_n = First_row_for_proc[i][p] - current_first_row;
2499 
2500  // send the values
2501  int tag = this->compute_tag(nproc, my_rank, p, 0);
2502  MPI_Request tr;
2503  MPI_Isend(const_cast<double*>(r[i].values_pt()) + offset_n,
2504  int(Nrow_local_for_proc[i][p]),
2505  MPI_DOUBLE,
2506  p,
2507  tag,
2508  Global_communicator_pt->mpi_comm(),
2509  &tr);
2510  send_reqs.push_back(tr);
2511  }
2512 
2513  // recv
2514  if (Nrow_local_from_proc[i][p] != 0)
2515  {
2516  // compute the offset for row_start
2517  int offset_n = First_row_from_proc[i][p] - first_row;
2518 
2519  // row_start
2520  int tag = this->compute_tag(nproc, p, my_rank, 0);
2521  MPI_Request tr;
2522  MPI_Irecv(local_r_values + offset_n,
2523  int(Nrow_local_from_proc[i][p]),
2524  MPI_DOUBLE,
2525  p,
2526  tag,
2527  Global_communicator_pt->mpi_comm(),
2528  &tr);
2529  recv_reqs.push_back(tr);
2530  }
2531  }
2532  }
2533  }
2534  }
2535 
2536 
2537  // and now we send to self
2538  if (!r[Color].distributed())
2539  {
2540  // just copy to the new vector
2541  const double* r_pt = r[Color].values_pt();
2542  unsigned nrow_local = local_r.nrow_local();
2543  for (unsigned i = 0; i < nrow_local; i++)
2544  {
2545  local_r_values[i] = r_pt[i];
2546  }
2547  }
2548  else
2549  {
2550  // the incoming residual associated with the processor
2551  const double* r_pt = r[Color].values_pt();
2552 
2553  // current first_row and nrow_local
2554  unsigned current_first_row = r[Color].first_row();
2555 
2556  // cache first_row
2557  unsigned first_row = Preconditioner_pt->first_row();
2558 
2559  //
2560  if (Nrow_local_from_proc[Color][my_rank] != 0)
2561  {
2562  // offset for values send to self
2563  unsigned offset_n_send =
2564  First_row_for_proc[Color][my_rank] - current_first_row;
2565 
2566  // offset for values receive from self
2567  unsigned offset_n_recv =
2568  First_row_from_proc[Color][my_rank] - first_row;
2569 
2570  // send/receive
2571  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2572  for (unsigned j = 0; j < n_n; j++)
2573  {
2574  local_r_values[offset_n_recv + j] = r_pt[offset_n_send + j];
2575  }
2576  }
2577  }
2578 
2579  // wait for the receives to complete
2580  unsigned n_recv = recv_reqs.size();
2581  if (n_recv)
2582  {
2583  MPI_Waitall(n_recv, &recv_reqs[0], MPI_STATUS_IGNORE);
2584  }
2585  recv_reqs.clear();
2586 
2587  // next solve
2588  // apply the local preconditioner
2589  DoubleVector local_z;
2590  Preconditioner_pt->preconditioner_solve(local_r, local_z);
2591  local_r.clear();
2592 
2593  // the local z values
2594  double* local_z_values = local_z.values_pt();
2595 
2596  // setup the vectors
2597  for (unsigned i = 0; i < Nprec; i++)
2598  {
2599  // if z[i] is not setup then set it up
2600  if (!z[i].built())
2601  {
2602  z[i].build(r[i].distribution_pt(), 0.0);
2603  }
2604  }
2605 
2606  // first we post the non-blocking sends and recvs
2607  for (unsigned i = 0; i < Nprec; i++)
2608  {
2609  if (r[i].distributed())
2610  {
2611  // current first_row and nrow_local
2612  unsigned current_first_row = r[i].first_row();
2613 
2614  // send and receive the contents of the vector
2615  for (unsigned p = 0; p < nproc; p++)
2616  {
2617  // use mpi methods to send to and receive from all but my rank
2618  if (p != my_rank)
2619  {
2620  // send
2621  if (Nrow_local_for_proc[i][p] != 0)
2622  {
2623  // compute the offset for the values
2624  int offset_n = First_row_for_proc[i][p] - current_first_row;
2625 
2626  // send the values
2627  int tag = this->compute_tag(nproc, my_rank, p, 0);
2628  MPI_Request tr;
2629  MPI_Irecv(z[i].values_pt() + offset_n,
2630  int(Nrow_local_for_proc[i][p]),
2631  MPI_DOUBLE,
2632  p,
2633  tag,
2634  Global_communicator_pt->mpi_comm(),
2635  &tr);
2636  recv_reqs.push_back(tr);
2637  }
2638 
2639  // recv
2640  if (Nrow_local_from_proc[i][p] != 0)
2641  {
2642  // compute the offset for row_start
2643  int offset_n = First_row_from_proc[i][p] - first_row;
2644 
2645  // vector
2646  int tag = this->compute_tag(nproc, p, my_rank, 0);
2647  MPI_Request tr;
2648  MPI_Isend(local_z_values + offset_n,
2649  int(Nrow_local_from_proc[i][p]),
2650  MPI_DOUBLE,
2651  p,
2652  tag,
2653  Global_communicator_pt->mpi_comm(),
2654  &tr);
2655  send_reqs.push_back(tr);
2656  }
2657  }
2658  }
2659  }
2660  // otherwise we need to share the results
2661  else
2662  {
2663  // number of processors associated with this preconditioner
2664  unsigned nproc_local = Local_communicator_pt->nproc();
2665 
2666  // my "proc number" for this preconditioner
2667  unsigned my_local_rank = Local_communicator_pt->my_rank();
2668 
2669  // sends to self completed later
2670  if (i != Color)
2671  {
2672  // post send requests
2673  for (unsigned j = my_local_rank; j < Nproc_for_prec[i];
2674  j += nproc_local)
2675  {
2676  int p = j + First_proc_for_prec[i];
2677  MPI_Request tr;
2678  MPI_Isend(local_z_values,
2679  z[Color].nrow(),
2680  MPI_DOUBLE,
2681  p,
2682  0,
2683  Global_communicator_pt->mpi_comm(),
2684  &tr);
2685  send_reqs.push_back(tr);
2686  }
2687 
2688  // compute the processor number to recv from
2689  int p = my_local_rank;
2690  while ((p - int(Nproc_for_prec[i])) >= 0)
2691  {
2692  p -= Nproc_for_prec[i];
2693  }
2694  p += First_proc_for_prec[i];
2695 
2696  // and recv
2697  MPI_Request tr;
2698  MPI_Irecv(z[i].values_pt(),
2699  z[i].nrow(),
2700  MPI_DOUBLE,
2701  p,
2702  0,
2703  Global_communicator_pt->mpi_comm(),
2704  &tr);
2705  recv_reqs.push_back(tr);
2706  }
2707  }
2708  }
2709 
2710  // and now we send to self
2711  if (!r[Color].distributed())
2712  {
2713  // just copy to the new vector
2714  double* z_pt = z[Color].values_pt();
2715  unsigned nrow_local = local_z.nrow_local();
2716  for (unsigned i = 0; i < nrow_local; i++)
2717  {
2718  z_pt[i] = local_z_values[i];
2719  }
2720  }
2721  else
2722  {
2723  //
2724  double* z_pt = z[Color].values_pt();
2725 
2726  // current first_row and nrow_local
2727  unsigned current_first_row = r[Color].first_row();
2728 
2729  // cache first_row
2730  unsigned first_row = Preconditioner_pt->first_row();
2731 
2732  //
2733  if (Nrow_local_from_proc[Color][my_rank] != 0)
2734  {
2735  // offset for values send to self
2736  unsigned offset_n_send =
2737  First_row_for_proc[Color][my_rank] - current_first_row;
2738 
2739  // offset for values receive from self
2740  unsigned offset_n_recv =
2741  First_row_from_proc[Color][my_rank] - first_row;
2742 
2743  // send/receive
2744  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2745  for (unsigned j = 0; j < n_n; j++)
2746  {
2747  z_pt[offset_n_send + j] = local_z_values[offset_n_recv + j];
2748  }
2749  }
2750  }
2751 
2752 
2753  // wait for the receives to complete
2754  n_recv = recv_reqs.size();
2755  if (n_recv)
2756  {
2757  MPI_Waitall(n_recv, &recv_reqs[0], MPI_STATUS_IGNORE);
2758  }
2759  recv_reqs.clear();
2760 
2761  // wait for the sends to complete
2762  unsigned n_send = send_reqs.size();
2763  if (n_send)
2764  {
2765  MPI_Waitall(n_send, &send_reqs[0], MPI_STATUS_IGNORE);
2766  }
2767  send_reqs.clear();
2768  }
2769 } // namespace oomph
2770 
2771 // End of "if we have mpi"
2772 #endif
cstr elem_len * i
Definition: cfortran.h:603
LinearAlgebraDistribution * distribution_pt() const
access to the LinearAlgebraDistribution
unsigned first_row() const
access function for the first row on this processor
void solve_preconditioners(const Vector< DoubleVector > &r, Vector< DoubleVector > &z)
Applies each preconditioner to the corresponding vector in r and z.
unsigned Method
the communication method in the setup_preconditioners(...) method
Vector< Vector< unsigned > > First_row_for_proc
Storage (indexed [i][j]) for the first row that will be sent from this processor to processor j for p...
Vector< unsigned > First_proc_for_prec
The first_row component of the distribution of the processors over the preconditioners.
unsigned Color
the Color of this processor (or the preconditioner number)
Vector< Vector< unsigned > > Nrow_local_for_proc
Storage (indexed [i][j]) for the nrow_local that will be sent from this processor to processor j for ...
Vector< Vector< unsigned > > First_row_from_proc
Storage (indexed [i][j]) for the first row that will be received by this processor from processor j f...
int compute_tag(const int &nproc, const int &source, const int &dest, const int &type)
helper method for computing the MPI_Isend and MPI_Irecv tags
OomphCommunicator * Global_communicator_pt
pointer to the global communicator for this preconditioner array
void setup_preconditioners(Vector< CRDoubleMatrix * > matrix_pt, Vector< Preconditioner * > prec_pt, const OomphCommunicator *comm_pt)
Setup the preconditioners. Sets up each preconditioner in the array for the corresponding matrix in t...
OomphCommunicator * Local_communicator_pt
Vector of communicators for the preconditioners.
Preconditioner * Preconditioner_pt
The pointer to the local preconditioner on this processor.
void clean_up_memory()
Clean up memory.
unsigned Nprec
the number of preconditioner in the array
Vector< Vector< unsigned > > Nrow_local_from_proc
Storage (indexed [i][j]) for the nrow_local that will be received by this processor from processor j ...
Vector< LinearAlgebraDistribution * > Distribution_pt
Vector< unsigned > Nproc_for_prec
The nrow_local component of the distribution of the processors over the preconditioners.
void setup(DoubleMatrixBase *matrix_pt)
Setup the preconditioner: store the matrix pointer and the communicator pointer then call preconditio...
virtual void preconditioner_solve(const DoubleVector &r, DoubleVector &z)=0
Apply the preconditioner. Pure virtual generic interface function. This method should apply the preco...
//////////////////////////////////////////////////////////////////// ////////////////////////////////...
OomphInfo oomph_info
Single (global) instantiation of the OomphInfo object – this is used throughout the library as a "rep...