superlu_dist.c
Go to the documentation of this file.
1 
2 
3 /*----------------------------------------------------------------
4  Interface to distributed SuperLU, created by JWB by adapting
5  code in the superlu distribution, i.e. files /SRC/pdgssvx.c
6  (function pdgssvx solves a system of linear equations) and
7  /EXAMPLE/pddrive.c demo (a driver program to illustrate how to
8  use pdgssvx).
9  Essentially the code below performs the same functions as
10  pdgssvx in a modified order which allows resolves. Much of the
11  code taken from pdgssvx remains essentially unchanged other
12  than changing the layout to match the oomph-lib standard. Comments
13  from the original code have been left unchanged whenever possible
14  to help match this code with that found in pdgssvx.c
15 
16  To update this driver code for use with later versions of
17  Distributed SuperLU I suggest first looking at changes (if any) to
18  the two distributed SuperLU files, and then making the corresponding
19  changes to the code below.
20 
21  Adapted from code found in:
22  -- Distributed SuperLU routine (version 2.0) --
23  Lawrence Berkeley National Lab, Univ. of California Berkeley.
24  March 15, 2003
25 
26  ----------------------------------------------------------------
27 */
28 #include <math.h>
29 #ifdef USING_OOMPH_SUPERLU_DIST
30 #include "oomph_superlu_dist_3.0.h"
31 #else
32 #include<superlu_defs.h>
33 #include<superlu_ddefs.h>
34 #include<Cnames.h>
35 #include<machines.h>
36 #include<psymbfact.h>
37 #include<supermatrix.h>
38 #include<old_colamd.h>
39 #include<util_dist.h>
40 #endif
41 
42 
43 /* ================================================= */
44 /* Struct for the lu factors */
45 /* ================================================= */
46 typedef struct
47 {
48  gridinfo_t* grid;
49  SuperMatrix* A;
50  SuperMatrix* AC;
51  ScalePermstruct_t* ScalePermstruct;
52  LUstruct_t* LUstruct;
53  SOLVEstruct_t* SOLVEstruct;
54  superlu_options_t* options;
55  int_t rowequ;
56  int_t colequ;
57  double anorm;
59 
60 
61 /* ================================================= */
62 /* Can't think of any other way to store the memory */
63 /* stats... (PM) */
64 /* ================================================= */
66 {
67  // Storage for the memory stats
68  mem_usage_t Memory_usage;
69 
70  // Boolean
73 
74 /* ========================================================================= */
75 /* Helper to record memory usage*/
76 /* ========================================================================= */
78 {
79  // If the LU decomposition has been stored
81  {
83  }
84  else
85  {
86  return 0.0;
87  }
88 } // End of get_lu_factor_memory_usage_in_bytes
89 
90 /* ========================================================================= */
91 /* Helper to record memory usage*/
92 /* ========================================================================= */
94 {
95  // If the LU decomposition has been stored
97  {
99  }
100  else
101  {
102  return 0.0;
103  }
104 } // End of get_total_memory_usage_in_bytes
105 
106 /* ========================================================================= */
107 /* Helper to record memory usage*/
108 /* ========================================================================= */
109 void get_memory_usage_in_bytes_dist(double* lu_factor_memory,
110  double* total_memory)
111 {
112  (*lu_factor_memory)=symbolic_memory_statistics_storage.Memory_usage.for_lu;
113  (*total_memory)=symbolic_memory_statistics_storage.Memory_usage.total;
114 }
115 
116 //=============================================================================
117 // helper method - just calls the superlu method dCompRow_to_CompCol to convert
118 // the c-style vectors of a cr matrix to a cc matrix
119 //=============================================================================
120 void superlu_cr_to_cc(int nrow, int ncol, int nnz, double* cr_values,
121  int* cr_index, int* cr_start, double** cc_values,
122  int** cc_index, int** cc_start)
123 {
124  dCompRow_to_CompCol(nrow,ncol,nnz,cr_values,cr_index,cr_start,cc_values,
125  cc_index,cc_start);
126 }
127 
128 /*----------------------------------------------------------------
129  Bridge to distributed SuperLU with distributed memory (version 2.0).
130  Requires input of system matrix in compressed row form.
131 
132  Parameters:
133  op_flag = int specifies the operation:
134  1, performs LU decomposition for the first time
135  2, performs triangular solve
136  3, free all the storage in the end
137  - n = size of system (square matrix)
138  - nnz = # of nonzero entries
139  - values = 1D C array of nonzero entries
140  - row_index = 1D C array of row indices
141  - column_start = 1D C array of column start indices
142  - b = 1D C array representing the rhs vector, is overwritten
143  by solution.
144  - nprow = # of rows in process grid
145  - npcol = # of columns in process grid
146  - doc = 0/1 for doc/no doc
147  - data = pointer to structure to contain LU solver data.
148  If *opt_flag == 1, it is an output. Otherwise, it it an input.
149 
150  Return value for *info:
151  = 0: successful exit
152  > 0: if *info = i, and i is
153  <= A->ncol: U(i,i) is exactly zero. The factorization has
154  been completed, but the factor U is exactly singular,
155  so the solution could not be computed.
156  > A->ncol: number of bytes allocated when memory allocation
157  failure occurred, plus A->ncol.
158  < 0: some other error
159  ----------------------------------------------------------------
160 */
161 void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations,
162  int n, int nnz_local,
163  int nrow_local,int first_row,
164  double *values, int *col_index,
165  int *row_start, double *b,
166  int nprow, int npcol,
167  int doc, void **data, int *info,
168  MPI_Comm comm)
169 {
170  /* Some SuperLU structures */
171  superlu_options_t *options;
172  SuperLUStat_t stat;
173  SuperMatrix *A;
174  ScalePermstruct_t *ScalePermstruct;
175  LUstruct_t *LUstruct;
176  SOLVEstruct_t *SOLVEstruct;
177  gridinfo_t *grid;
178 
179  /* Structure to hold SuperLU structures and data */
180  superlu_dist_data *superlu_data;
181 
182  int_t *perm_r; /* row permutations from partial pivoting */
183  int_t *perm_c; /* column permutation vector */
184  int_t *etree; /* elimination tree */
185  int_t *rowptr=NULL; int_t *colind; /* Local A in NR*/
186  int_t job, rowequ, colequ, iinfo, need_value, i, j, irow, icol;
187  int_t m_loc, fst_row, nnz, nnz_loc; /* dist_mem_use; */
188  int_t *colptr, *rowind;
189  NRformat_loc *Astore;
190  SuperMatrix GA; /* Global A in NC format */
191  NCformat *GAstore;
192  double *a_GA=NULL;
193  SuperMatrix GAC; /* Global A in NCP format (add n end pointers) */
194  NCPformat *GACstore;
195  Glu_persist_t *Glu_persist;
196  Glu_freeable_t *Glu_freeable=NULL;
197 
198  /* Other stuff needed by SuperLU */
199  double *berr=NULL;
200  double *a=NULL; double *X, *b_col;
201  double *B=b;
202  double *C, *R, *C1, *R1, *x_col; /* *bcol, */
203  double amax, t, colcnd, rowcnd; double anorm=0.0;
204  char equed[1], norm[1];
205  int ldx; /* LDA for matrix X (local). */
206  //static mem_usage_t symb_mem_usage;
207  fact_t Fact;
208  int_t Equil, factored, notran, permc_spec;
209 
210  /* We're only doing single rhs problems
211  note: code will need modifying to deal with
212  multiple rhs (see function pdgssvx) */
213 
214  int nrhs=1;
215 
216  /* Square matrix */
217  int m=n;
218 
219  /* Set 'Leading dimension' of rhs vector */
220  int ldb=n;
221 
222  /* Initialize the statistics variables. */
223  PStatInit(&stat);
224 
225  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
226  SET UP GRID, FACTORS, ETC
227  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
228  if (opt_flag==1)
229  {
230  /* Allocate data structure to store data between calls to this function */
231  superlu_data =
232  (superlu_dist_data *) SUPERLU_MALLOC(sizeof(superlu_dist_data));
233 
234  /* Initialize the superlu process grid. */
235  grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));
236  superlu_gridinit(comm, nprow, npcol, grid);
237  superlu_data->grid = grid;
238 
239  /* Bail out if I do not belong in the grid. */
240  int iam = grid->iam;
241  if (iam >= nprow * npcol) return;
242 
243  /* Allocate memory for SuperLU_DIST structures */
244  options = (superlu_options_t *) SUPERLU_MALLOC(sizeof(superlu_options_t));
245  A = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
246  ScalePermstruct =
247  (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
248  LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t));
249  SOLVEstruct = (SOLVEstruct_t *) SUPERLU_MALLOC(sizeof(SOLVEstruct_t));
250 
251  /* Create SuperMatrix from compressed row representation */
252  dCreate_CompRowLoc_Matrix_dist(A,m,n,nnz_local,nrow_local,first_row,
253  values,col_index,row_start,
254  SLU_NR_loc,SLU_D,SLU_GE);
255 
256  /* Set the default options */
257  set_default_options_dist(options);
258 
259  /* Is the matrix transposed (NOTRANS or TRANS)? */
260  options->Trans = NOTRANS;
261 
262  /* Row permutations (NATURAL [= do nothing], */
263  /* LargeDiag [default], ...)? */
264  /* options->RowPerm=NATURAL; */
265  options->RowPerm=LargeDiag;
266 
267  /* Column permutations (NATURAL [= do nothing], */
268  /* MMD_AT_PLUS_A [default],...) */
269  options->ColPerm=MMD_AT_PLUS_A;
270 
271  /* Use natural ordering instead? */
272  if (allow_permutations==0)
273  {
274  options->ColPerm=NATURAL;
275  options->RowPerm=NATURAL;
276  }
277 
278  /* printf("\n\n\nSWITCHING OFF EQUILIBRATION\n\n\n"); */
279  /* options->Equil=NO; */
280 
281  /* Iterative refinement (essential as far as I can tell).*/
282  /* Can be "NO" or "DOUBLE"*/
283  options->IterRefine = SLU_DOUBLE;
284 
285  /* Print stats during solve? */
286  if (doc==0)
287  {
288  options->PrintStat = YES;
289  }
290  else
291  {
292  options->PrintStat = NO;
293  }
294 
295 
296  /* Doc output on process 0 if required: */
297  if ((!iam)&&(doc==0))
298  {
299  printf("\nPerforming SuperLU_DIST setup\n");
300  printf("Process grid\t%d X %d\n", grid->nprow, grid->npcol);
301  print_options_dist(options);
302  }
303 
304  /* Initialize ScalePermstruct and LUstruct. */
305  ScalePermstructInit(m, n, ScalePermstruct);
306  LUstructInit(m, n, LUstruct);
307 
308  /* Initialization. */
309  Glu_persist = LUstruct->Glu_persist;
310  Astore = (NRformat_loc *) A->Store;
311  nnz_loc = Astore->nnz_loc;
312  m_loc = Astore->m_loc;
313  fst_row = Astore->fst_row;
314  a = Astore->nzval;
315  rowptr = Astore->rowptr;
316  colind = Astore->colind;
317 
318  job = 5;
319  Fact = options->Fact;
320  factored = (Fact == FACTORED);
321  Equil = (!factored && options->Equil == YES);
322  notran = (options->Trans == NOTRANS);
323  rowequ = colequ = FALSE;
324  if (factored || (Fact == SamePattern_SameRowPerm && Equil))
325  {
326  rowequ = (ScalePermstruct->DiagScale == ROW) ||
327  (ScalePermstruct->DiagScale == BOTH);
328  colequ = (ScalePermstruct->DiagScale == COL) ||
329  (ScalePermstruct->DiagScale == BOTH);
330  }
331  else
332  {
333  rowequ = colequ = FALSE;
334  }
335 
336  /* Test the control parameters etc. */
337  *info = 0;
338  if (Fact < 0 || Fact > FACTORED)
339  {
340  *info = -1;
341  }
342  else if (options->RowPerm < 0 || options->RowPerm > MY_PERMR)
343  {
344  *info = -1;
345  }
346  else if (options->ColPerm < 0 || options->ColPerm > MY_PERMC)
347  {
348  *info = -1;
349  }
350  else if (options->IterRefine < 0 || options->IterRefine > SLU_EXTRA)
351  {
352  *info = -1;
353  }
354  else if (options->IterRefine == SLU_EXTRA)
355  {
356  *info = -1;
357  fprintf(stderr, "Extra precise iterative refinement yet to support.\n");
358  }
359  else if (A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
360  || A->Dtype != SLU_D || A->Mtype != SLU_GE)
361  {
362  *info = -2;
363  }
364  else if (ldb < m_loc)
365  {
366  *info = -5;
367  }
368  else if (nrhs < 0)
369  {
370  *info = -6;
371  }
372  if (*info)
373  {
374  printf("Trouble in pdgstrf. Info=%i\n",*info);
375  if (*info==-1)
376  {
377  printf("Error in options.\n");
378  }
379  else if (*info==-2)
380  {
381  printf("Error in matrix.\n");
382  }
383  else if (*info==-5)
384  {
385  printf("ldb < m_loc\n");
386  }
387  else if (*info==-6)
388  {
389  printf("nrhs < 0\n");
390  }
391  return;
392  }
393 
394  /* The following arrays are replicated on all processes. */
395  perm_r = ScalePermstruct->perm_r;
396  perm_c = ScalePermstruct->perm_c;
397  etree = LUstruct->etree;
398  R = ScalePermstruct->R;
399  C = ScalePermstruct->C;
400 
401  /* Allocate storage. */
402  if (Equil)
403  {
404  /* Not factored & ask for equilibration */
405  /* Allocate storage if not done so before. */
406  switch (ScalePermstruct->DiagScale)
407  {
408  case NOEQUIL:
409  if (!(R = (double *) doubleMalloc_dist(m)))
410  ABORT("Malloc fails for R[].");
411  if (!(C = (double *) doubleMalloc_dist(n)))
412  ABORT("Malloc fails for C[].");
413  ScalePermstruct->R = R;
414  ScalePermstruct->C = C;
415  break;
416  case ROW:
417  if (!(C = (double *) doubleMalloc_dist(n)))
418  ABORT("Malloc fails for C[].");
419  ScalePermstruct->C = C;
420  break;
421  case COL:
422  if (!(R = (double *) doubleMalloc_dist(m)))
423  ABORT("Malloc fails for R[].");
424  ScalePermstruct->R = R;
425  break;
426  default:
427  printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL);
428  ABORT("Never get here.");
429  break;
430  }
431  }
432 
433  /* ------------------------------------------------------------
434  Diagonal scaling to equilibrate the matrix.
435  ------------------------------------------------------------*/
436  if (Equil)
437  {
438  t = SuperLU_timer_();
439 
440  if (Fact == SamePattern_SameRowPerm)
441  {
442  /* Reuse R and C. */
443  switch (ScalePermstruct->DiagScale)
444  {
445  case NOEQUIL:
446  break;
447  case ROW:
448  irow = fst_row;
449  for (j = 0; j < m_loc; ++j)
450  {
451  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
452  {
453  a[i] *= R[irow]; /* Scale rows. */
454  }
455  ++irow;
456  }
457  break;
458  case COL:
459  for (j = 0; j < m_loc; ++j)
460  {
461  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
462  {
463  icol = colind[i];
464  a[i] *= C[icol]; /* Scale columns. */
465  }
466  }
467  break;
468  case BOTH:
469  irow = fst_row;
470  for (j = 0; j < m_loc; ++j)
471  {
472  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
473  {
474  icol = colind[i];
475  a[i] *= R[irow] * C[icol]; /* Scale rows and cols. */
476  }
477  ++irow;
478  }
479  break;
480  }
481  }
482  else
483  {
484  /* Compute R & C from scratch */
485  /* Compute the row and column scalings. */
486  pdgsequ(A, R, C, &rowcnd, &colcnd, &amax, &iinfo, grid);
487 
488  /* Equilibrate matrix A if it is badly-scaled. */
489  pdlaqgs(A, R, C, rowcnd, colcnd, amax, equed);
490 
491  if (lsame_(equed, "R"))
492  {
493  ScalePermstruct->DiagScale = rowequ = ROW;
494  }
495  else if (lsame_(equed, "C"))
496  {
497  ScalePermstruct->DiagScale = colequ = COL;
498  }
499  else if (lsame_(equed, "B"))
500  {
501  ScalePermstruct->DiagScale = BOTH;
502  rowequ = ROW;
503  colequ = COL;
504  }
505  else
506  ScalePermstruct->DiagScale = NOEQUIL;
507  } /* if Fact ... */
508 
509  stat.utime[EQUIL] = SuperLU_timer_() - t;
510  } /* if Equil ... */
511 
512  if (!factored)
513  {
514  /* Skip this if already factored. */
515  /*
516  Gather A from the distributed compressed row format to
517  global A in compressed column format.
518  Numerical values are gathered only when a row permutation
519  for large diagonal is sought after.
520  */
521  if (Fact != SamePattern_SameRowPerm)
522  {
523  need_value = (options->RowPerm == LargeDiag);
524  pdCompRow_loc_to_CompCol_global(need_value, A, grid, &GA);
525  GAstore = (NCformat *) GA.Store;
526  colptr = GAstore->colptr;
527  rowind = GAstore->rowind;
528  nnz = GAstore->nnz;
529  if (need_value) a_GA = GAstore->nzval;
530  else assert(GAstore->nzval == NULL);
531  }
532 
533  /* ------------------------------------------------------------
534  Find the row permutation for A.
535  ------------------------------------------------------------*/
536  if ((int) options->RowPerm != (int) NO)
537  {
538  t = SuperLU_timer_();
539  if (Fact != SamePattern_SameRowPerm)
540  {
541  if (options->RowPerm == MY_PERMR)
542  {
543  /* Use user's perm_r. */
544  /* Permute the global matrix GA for symbfact() */
545  for (i = 0; i < colptr[n]; ++i)
546  {
547  irow = rowind[i];
548  rowind[i] = perm_r[irow];
549  }
550  }
551  else
552  {
553  /* options->RowPerm == LargeDiag */
554  /* Get a new perm_r[] */
555  if (job == 5)
556  {
557  /* Allocate storage for scaling factors. */
558  if (!(R1 = doubleMalloc_dist(m)))
559  {
560  ABORT("SUPERLU_MALLOC fails for R1[]");
561  }
562  if (!(C1 = doubleMalloc_dist(n)))
563  {
564  ABORT("SUPERLU_MALLOC fails for C1[]");
565  }
566  }
567 
568  if (!iam)
569  {
570  /* Process 0 finds a row permutation */
571  dldperm(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1);
572 
573  MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
574  if (job == 5 && Equil)
575  {
576  MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
577  MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
578  }
579  }
580  else
581  {
582  MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
583  if (job == 5 && Equil)
584  {
585  MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
586  MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
587  }
588  }
589 
590  if (job == 5)
591  {
592  if (Equil)
593  {
594  for (i = 0; i < n; ++i)
595  {
596  R1[i] = exp(R1[i]);
597  C1[i] = exp(C1[i]);
598  }
599 
600  /* Scale the distributed matrix */
601  irow = fst_row;
602  for (j = 0; j < m_loc; ++j)
603  {
604  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
605  {
606  icol = colind[i];
607  a[i] *= R1[irow] * C1[icol];
608  }
609  ++irow;
610  }
611 
612  /* Multiply together the scaling factors. */
613  if (rowequ)
614  {
615  for (i = 0; i < m; ++i)
616  {
617  R[i] *= R1[i];
618  }
619  }
620  else
621  {
622  for (i = 0; i < m; ++i)
623  {
624  R[i] = R1[i];
625  }
626  }
627  if (colequ)
628  {
629  for (i = 0; i < n; ++i)
630  {
631  C[i] *= C1[i];
632  }
633  }
634  else
635  {
636  for (i = 0; i < n; ++i)
637  {
638  C[i] = C1[i];
639  }
640  }
641 
642  ScalePermstruct->DiagScale = BOTH;
643  rowequ = colequ = 1;
644 
645  } /* end Equil */
646 
647  /* Now permute global A to prepare for symbfact() */
648  for (j = 0; j < n; ++j)
649  {
650  for (i = colptr[j]; i < colptr[j+1]; ++i)
651  {
652  irow = rowind[i];
653  rowind[i] = perm_r[irow];
654  }
655  }
656  SUPERLU_FREE(R1);
657  SUPERLU_FREE(C1);
658  }
659  else
660  {
661  /* job = 2,3,4 */
662  for (j = 0; j < n; ++j)
663  {
664  for (i = colptr[j]; i < colptr[j+1]; ++i)
665  {
666  irow = rowind[i];
667  rowind[i] = perm_r[irow];
668  } /* end for i ... */
669  } /* end for j ... */
670  } /* end else job ... */
671  } /* end if options->RowPerm ... */
672 
673  t = SuperLU_timer_() - t;
674  stat.utime[ROWPERM] = t;
675  } /* end if Fact ... */
676  }
677  else
678  {
679  /* options->RowPerm == NOROWPERM */
680  for (i = 0; i <m; ++i) perm_r[i] = i;
681  }
682  } /* end if (!factored) */
683 
684  if (!factored || options->IterRefine)
685  {
686  /* Compute norm(A), which will be used to adjust small diagonal. */
687  if (notran)
688  *(unsigned char *)norm = '1';
689  else
690  *(unsigned char *)norm = 'I';
691  anorm = pdlangs(norm, A, grid);
692  }
693 
694 
695  /* ------------------------------------------------------------
696  Perform the LU factorization.
697  ------------------------------------------------------------*/
698  if (!factored)
699  {
700  t = SuperLU_timer_();
701  /*
702  Get column permutation vector perm_c[], according to permc_spec:
703  permc_spec = NATURAL: natural ordering
704  permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
705  permc_spec = MMD_ATA: minimum degree on structure of A'*A
706  permc_spec = COLAMD: approximate minimum degree column ordering
707  permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
708  */
709  permc_spec = options->ColPerm;
710  if (permc_spec != MY_PERMC && Fact == DOFACT)
711  {
712  get_perm_c_dist(iam, permc_spec, &GA, perm_c);
713  }
714 
715  stat.utime[COLPERM] = SuperLU_timer_() - t;
716 
717  /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'
718  (a.k.a. column etree), depending on the choice of ColPerm.
719  Adjust perm_c[] to be consistent with a postorder of etree.
720  Permute columns of A to form A*Pc'. */
721  if (Fact != SamePattern_SameRowPerm)
722  {
723  int_t *GACcolbeg, *GACcolend, *GACrowind;
724 
725  sp_colorder(options, &GA, perm_c, etree, &GAC);
726 
727  /* Form Pc*A*Pc' to preserve the diagonal of the matrix GAC. */
728  GACstore = GAC.Store;
729  GACcolbeg = GACstore->colbeg;
730  GACcolend = GACstore->colend;
731  GACrowind = GACstore->rowind;
732  for (j = 0; j < n; ++j)
733  {
734  for (i = GACcolbeg[j]; i < GACcolend[j]; ++i)
735  {
736  irow = GACrowind[i];
737  GACrowind[i] = perm_c[irow];
738  }
739  }
740 
741  /* Perform a symbolic factorization on Pc*Pr*A*Pc' and set up the
742  nonzero data structures for L & U. */
743  t = SuperLU_timer_();
744  if (!(Glu_freeable = (Glu_freeable_t *)
745  SUPERLU_MALLOC(sizeof(Glu_freeable_t))))
746  {
747  ABORT("Malloc fails for Glu_freeable.");
748  }
749 
750  /* Every process does this. */
751  iinfo = symbfact(options, iam, &GAC, perm_c, etree,
752  Glu_persist, Glu_freeable);
753 
754  stat.utime[SYMBFAC] = SuperLU_timer_() - t;
755  if (iinfo < 0)
756  {
757  /* Successful return */
758  QuerySpace_dist(n, -iinfo, Glu_freeable,
760  }
761  else
762  {
763  if (!iam)
764  {
765  fprintf(stderr, "symbfact() error returns %d\n", iinfo);
766  exit(-1);
767  }
768  }
769  } /* end if Fact ... */
770 
771  /* Apply column permutation to the original distributed A */
772  for (j = 0; j < nnz_loc; ++j)
773  {
774  colind[j] = perm_c[colind[j]];
775  }
776 
777  /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage.
778  NOTE: the row permutation Pc*Pr is applied internally in the
779  distribution routine. */
780  t = SuperLU_timer_();
781  /* dist_mem_use = */
782  pddistribute(Fact, n, A, ScalePermstruct,
783  Glu_freeable, LUstruct, grid);
784  stat.utime[DIST] = SuperLU_timer_() - t;
785 
786  /* Deallocate storage used in symbolic factorization. */
787  if (Fact != SamePattern_SameRowPerm)
788  {
789  iinfo = symbfact_SubFree(Glu_freeable);
790  SUPERLU_FREE(Glu_freeable);
791  }
792 
793  /* Perform numerical factorization in parallel. */
794  t = SuperLU_timer_();
795  pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
796  stat.utime[FACT] = SuperLU_timer_() - t;
797 
798  /* Destroy GA and GAC */
799  if (Fact != SamePattern_SameRowPerm)
800  {
801  Destroy_CompCol_Matrix_dist(&GA);
802  Destroy_CompCol_Permuted_dist(&GAC);
803  }
804  } /* end if (!factored) */
805 
806  if (*info!=0)
807  {
808  printf("Trouble in pdgstrf. Info=%i\n",*info);
809  if (*info>0)
810  {
811  printf("U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
812  printf("been completed, but the factor U is exactly singular,\n");
813  printf("and division by zero will occur if it is used to solve a\n");
814  printf("system of equations.\n");
815  }
816  else
817  {
818  printf("The %i-th argument had an illegal value.\n", *info);
819  }
820  }
821 
822  /* ------------------------------------------------------------
823  Initialize the solver.
824  ------------------------------------------------------------*/
825  if (options->SolveInitialized == NO)
826  {
827  dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,
828  SOLVEstruct);
829  }
830 
831  if (options->IterRefine)
832  {
833  if (options->RefineInitialized == NO || Fact == DOFACT)
834  {
835  /* All these cases need to re-initialize gsmv structure */
836  if (options->RefineInitialized)
837  {
838  pdgsmv_finalize(SOLVEstruct->gsmv_comm);
839  }
840  pdgsmv_init(A, SOLVEstruct->row_to_proc, grid,
841  SOLVEstruct->gsmv_comm);
842 
843  options->RefineInitialized = YES;
844  }
845  }
846 
847  /* Print the statistics. */
848  if ((doc==0) && (!iam))
849  {
850  printf("\nstats after setup....\n");
851  PStatPrint(options, &stat, grid);
852  }
853 
854  /* ------------------------------------------------------------
855  Set up data structure.
856  ------------------------------------------------------------*/
857  superlu_data->A = A;
858  superlu_data->options = options;
859  superlu_data->ScalePermstruct = ScalePermstruct;
860  superlu_data->LUstruct = LUstruct;
861  superlu_data->SOLVEstruct = SOLVEstruct;
862  superlu_data->colequ = colequ;
863  superlu_data->rowequ = rowequ;
864  superlu_data->anorm = anorm;
865  *data = superlu_data;
866 
867  } /* End of setup */
868 
869 
870  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
871  PERFORM A SOLVE
872  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
873  if (opt_flag==2)
874  {
875  /* Get pointer to the grid */
876  superlu_data = (superlu_dist_data *)*data;
877  grid = superlu_data->grid;
878 
879  /* Bail out if I do not belong in the grid. */
880  int iam = grid->iam;
881  if (iam >= nprow * npcol)
882  {
883  return;
884  }
885 
886  if ((doc==0)&&(!iam))
887  {
888  printf("\nPerforming SuperLU_DIST solve\n");
889  }
890 
891  /* ------------------------------------------------------------
892  Set other pointers to data structure.
893  ------------------------------------------------------------*/
894  A = superlu_data->A;
895  options = superlu_data->options;
896  ScalePermstruct = superlu_data->ScalePermstruct;
897  LUstruct = superlu_data->LUstruct;
898  SOLVEstruct = superlu_data->SOLVEstruct;
899  colequ = superlu_data->colequ;
900  rowequ = superlu_data->rowequ;
901  anorm = superlu_data->anorm;
902 
903  /* Initialization. */
904  Astore = (NRformat_loc *) A->Store;
905  nnz_loc = Astore->nnz_loc;
906  m_loc = Astore->m_loc;
907  fst_row = Astore->fst_row;
908  colind = Astore->colind;
909  R = ScalePermstruct->R;
910  C = ScalePermstruct->C;
911 
912  /* Local control paramaters */
913  Fact = options->Fact;
914  factored = (Fact == FACTORED);
915  Equil = (!factored && options->Equil == YES);
916  notran = (options->Trans == NOTRANS);
917 
918  /* ------------------------------------------------------------
919  Scale the right-hand side if equilibration was performed.
920  ------------------------------------------------------------*/
921  if (notran)
922  {
923  if (rowequ)
924  {
925  b_col = B;
926  for (j = 0; j < nrhs; ++j)
927  {
928  irow = fst_row;
929  for (i = 0; i < m_loc; ++i)
930  {
931  b_col[i] *= R[irow];
932  ++irow;
933  }
934  b_col += ldb;
935  }
936  }
937  }
938  else if (colequ)
939  {
940  b_col = B;
941  for (j = 0; j < nrhs; ++j)
942  {
943  irow = fst_row;
944  for (i = 0; i < m_loc; ++i)
945  {
946  b_col[i] *= C[irow];
947  ++irow;
948  }
949  b_col += ldb;
950  }
951  }
952 
953  /* Save a copy of the right-hand side. */
954  ldx = ldb;
955  if (!(X = doubleMalloc_dist(((size_t)ldx) * nrhs)))
956  {
957  ABORT("Malloc fails for X[]");
958  }
959  x_col = X;
960  b_col = B;
961  for (j = 0; j < nrhs; ++j)
962  {
963  for (i = 0; i < m_loc; ++i)
964  {
965  x_col[i] = b_col[i];
966  }
967  x_col += ldx;
968  b_col += ldb;
969  }
970 
971 
972  /* ------------------------------------------------------------
973  Solve the linear system.
974  ------------------------------------------------------------*/
975  pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc,
976  fst_row, ldb, nrhs, SOLVEstruct, &stat, info);
977 
978  if (*info!=0)
979  {
980  printf("Trouble in pdgstrs. Info=%i\n",*info);
981  printf("The %i-th argument had an illegal value.\n", *info);
982  }
983 
984  /* ------------------------------------------------------------
985  Use iterative refinement to improve the computed solution and
986  compute error bounds and backward error estimates for it.
987  ------------------------------------------------------------*/
988  if (options->IterRefine)
989  {
990  /* Improve the solution by iterative refinement. */
991  int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;
992  /*SOLVEstruct_t *SOLVEstruct1;*/ /* Used by refinement. */
993 
994  t = SuperLU_timer_();
995  if (options->RefineInitialized == NO || Fact == DOFACT)
996  {
997  /* Save a copy of the transformed local col indices
998  in colind_gsmv[]. */
999  if (colind_gsmv)
1000  {
1001  SUPERLU_FREE(colind_gsmv);
1002  }
1003  if (!(it = intMalloc_dist(nnz_loc)))
1004  {
1005  ABORT("Malloc fails for colind_gsmv[]");
1006  }
1007  colind_gsmv = SOLVEstruct->A_colind_gsmv = it;
1008  for (i = 0; i < nnz_loc; ++i)
1009  {
1010  colind_gsmv[i] = colind[i];
1011  }
1012  }
1013  else if (Fact == SamePattern || Fact == SamePattern_SameRowPerm)
1014  {
1015  double at;
1016  int_t k, jcol, p;
1017  /* Swap to beginning the part of A corresponding to the
1018  local part of X, as was done in pdgsmv_init() */
1019  for (i = 0; i < m_loc; ++i)
1020  {
1021  /* Loop through each row */
1022  k = rowptr[i];
1023  for (j = rowptr[i]; j < rowptr[i+1]; ++j)
1024  {
1025  jcol = colind[j];
1026  p = SOLVEstruct->row_to_proc[jcol];
1027  if (p == iam)
1028  {
1029  /* Local */
1030  at = a[k]; a[k] = a[j]; a[j] = at;
1031  ++k;
1032  }
1033  }
1034  }
1035 
1036  /* Re-use the local col indices of A obtained from the
1037  previous call to pdgsmv_init() */
1038  for (i = 0; i < nnz_loc; ++i)
1039  {
1040  colind[i] = colind_gsmv[i];
1041  }
1042  }
1043 
1044  /* Storage for backward error */
1045  if (!(berr = doubleMalloc_dist(nrhs)))
1046  {
1047  ABORT("Malloc fails for berr[].");
1048  }
1049 
1050  pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,
1051  B, ldb, X, ldx, nrhs, SOLVEstruct, berr, &stat, info);
1052 
1053  stat.utime[REFINE] = SuperLU_timer_() - t;
1054  }
1055 
1056  if (*info!=0)
1057  {
1058  printf("Trouble in pdgsrfs. Info=%i\n",*info);
1059  printf("The %i-th argument had an illegal value.\n", *info);
1060  }
1061 
1062  /* Print the statistics. */
1063  if ((doc==0) && (!iam))
1064  {
1065  printf("\nstats after solve....\n");
1066  PStatPrint(options, &stat, grid);
1067  }
1068 
1069  /* Permute the solution matrix B <= Pc'*X. */
1070  pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,
1071  SOLVEstruct->inv_perm_c,
1072  X, ldx, B, ldb, nrhs, grid);
1073 
1074  /* Transform the solution matrix X to a solution of the original
1075  system before the equilibration. */
1076  if (notran)
1077  {
1078  if (colequ)
1079  {
1080  b_col = B;
1081  for (j = 0; j < nrhs; ++j)
1082  {
1083  irow = fst_row;
1084  for (i = 0; i < m_loc; ++i)
1085  {
1086  b_col[i] *= C[irow];
1087  ++irow;
1088  }
1089  b_col += ldb;
1090  }
1091  }
1092  }
1093  else if (rowequ)
1094  {
1095  b_col = B;
1096  for (j = 0; j < nrhs; ++j)
1097  {
1098  irow = fst_row;
1099  for (i = 0; i < m_loc; ++i)
1100  {
1101  b_col[i] *= R[irow];
1102  ++irow;
1103  }
1104  b_col += ldb;
1105  }
1106  }
1107 
1108  /* Clean up memory */
1109  if (options->IterRefine)
1110  {
1111  SUPERLU_FREE(berr);
1112  }
1113  SUPERLU_FREE(X);
1114 
1115  } /* End of solve */
1116 
1117  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1118  PERFORM CLEAN UP OF MEMORY
1119  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1120  if (opt_flag==3)
1121  {
1122  /* Get pointer to the process grid */
1123  superlu_data = (superlu_dist_data *)*data;
1124  grid = superlu_data->grid;
1125 
1126  /* Bail out if I do not belong in the grid. */
1127  int iam = grid->iam;
1128  if (iam >= nprow * npcol) goto out;
1129  if ((doc==0)&&(!iam))
1130  {
1131  printf("\nCleaning up memory allocated for SuperLU_DIST\n");
1132  }
1133 
1134  /* ------------------------------------------------------------
1135  Set pointers to the data structure.
1136  ------------------------------------------------------------*/
1137  A = superlu_data->A;
1138  options = superlu_data->options;
1139  ScalePermstruct = superlu_data->ScalePermstruct;
1140  LUstruct = superlu_data->LUstruct;
1141  SOLVEstruct = superlu_data->SOLVEstruct;
1142 
1143  /* -------------------------------
1144  Set the other pointers required
1145  -------------------------------*/
1146  R = ScalePermstruct->R;
1147  C = ScalePermstruct->C;
1148 
1149  /* Local control paramaters */
1150  Fact = options->Fact;
1151  factored = (Fact == FACTORED);
1152  Equil = (!factored && options->Equil == YES);
1153  notran = (options->Trans == NOTRANS);
1154 
1155  /* Deallocate R and/or C if it was not used. */
1156  if (Equil && Fact != SamePattern_SameRowPerm)
1157  {
1158  switch (ScalePermstruct->DiagScale)
1159  {
1160  case NOEQUIL:
1161  SUPERLU_FREE(R);
1162  SUPERLU_FREE(C);
1163  break;
1164  case ROW:
1165  SUPERLU_FREE(C);
1166  break;
1167  case COL:
1168  SUPERLU_FREE(R);
1169  break;
1170  default:
1171  /* Apparently this one is ok */
1172  /* printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL); */
1173  /* ABORT("Never get here. THIS IS THE ONE");*/
1174  break;
1175  }
1176  }
1177 
1178  /* Free storage */
1179  ScalePermstructFree(ScalePermstruct);
1180  Destroy_LU(n, grid, LUstruct);
1181  LUstructFree(LUstruct);
1182  dSolveFinalize(options, SOLVEstruct);
1183  //Destroy_CompRowLoc_Matrix_dist(&A);
1184 
1185  // Only destroy the store part of the matrix
1186  Destroy_SuperMatrix_Store_dist(A);
1187 
1188  /* Deallocate memory */
1189  SUPERLU_FREE(A);
1190  SUPERLU_FREE(ScalePermstruct);
1191  SUPERLU_FREE(LUstruct);
1192  SUPERLU_FREE(SOLVEstruct);
1193  SUPERLU_FREE(options);
1194 
1195  /* Release the superlu process grid. */
1196 out:
1197  superlu_gridexit(grid);
1198 
1199  SUPERLU_FREE(grid);
1200  SUPERLU_FREE(superlu_data);
1201 
1202  }
1203 
1204  /* Free storage */
1205  PStatFree(&stat);
1206 
1207  return;
1208 
1209 }
1210 
1211 
1212 
1213 /*----------------------------------------------------------------
1214  Bridge to distributed SuperLU with distributed memory (version 2.0).
1215  Requires input of system matrix in compressed row form.
1216 
1217  Parameters:
1218  op_flag = int specifies the operation:
1219  1, performs LU decomposition for the first time
1220  2, performs triangular solve
1221  3, free all the storage in the end
1222  - n = size of system (square matrix)
1223  - nnz = # of nonzero entries
1224  - values = 1D C array of nonzero entries
1225  - row_index = 1D C array of row indices
1226  - column_start = 1D C array of column start indices
1227  - b = 1D C array representing the rhs vector, is overwritten
1228  by solution.
1229  - nprow = # of rows in process grid
1230  - npcol = # of columns in process grid
1231  - doc = 0/1 for doc/no doc
1232  - data = pointer to structure to contain LU solver data.
1233  If *opt_flag == 1, it is an output. Otherwise, it it an input.
1234 
1235  Return value of *info:
1236  = 0: successful exit
1237  > 0: if *info = i, and i is
1238  <= A->ncol: U(i,i) is exactly zero. The factorization has
1239  been completed, but the factor U is exactly singular,
1240  so the solution could not be computed.
1241  > A->ncol: number of bytes allocated when memory allocation
1242  failure occurred, plus A->ncol.
1243  < 0: some other error
1244  ----------------------------------------------------------------
1245 */
1246 void superlu_dist_global_matrix(int opt_flag, int allow_permutations,
1247  int n_in, int nnz_in,
1248  double *values,
1249  int *row_index, int *col_start,
1250  double *b, int nprow, int npcol,
1251  int doc, void **data, int *info,
1252  MPI_Comm comm)
1253 {
1254  /* Some SuperLU structures */
1255  superlu_options_t *options;
1256  SuperLUStat_t stat;
1257  SuperMatrix *A;
1258  SuperMatrix *AC;
1259  ScalePermstruct_t *ScalePermstruct;
1260  LUstruct_t *LUstruct;
1261  gridinfo_t *grid;
1262 
1263  /* Structure to hold SuperLU structures and data */
1264  superlu_dist_data *superlu_data;
1265 
1266  int_t *perm_r; /* row permutations from partial pivoting */
1267  int_t *perm_c; /* column permutation vector */
1268  int_t *etree; /* elimination tree */
1269  int_t job, rowequ, colequ, iinfo, i, j, irow; /* , need_value */
1270  int_t m, n, nnz;
1271  int_t *colptr, *rowind;
1272  int_t Equil, factored, notran, permc_spec; /*, dist_mem_use; */
1273  NCformat *Astore;
1274  NCPformat *ACstore;
1275  Glu_persist_t *Glu_persist;
1276  Glu_freeable_t *Glu_freeable=NULL;
1277 
1278  /* Other stuff needed by SuperLU */
1279  double *berr=NULL;
1280  double *a, *X, *b_col;
1281  double *B=b;
1282  double *C, *R, *C1, *R1, *b_work, *x_col; /* *bcol, */
1283  double amax, t, colcnd, rowcnd; double anorm=0.0;
1284  char equed[1], norm[1];
1285  int ldx; /* LDA for matrix X (local). */
1286  int iam;
1287  //static mem_usage_t num_mem_usage, symb_mem_usage;
1288  fact_t Fact;
1289 
1290 
1291  /* We're only doing single rhs problems
1292  note: code will need modifying to deal with
1293  multiple rhs (see function pdgssvx) */
1294  int nrhs=1;
1295 
1296  /* Square matrix */
1297  n=n_in;
1298  m=n_in;
1299  nnz=nnz_in;
1300 
1301  /* Set 'Leading dimension' of rhs vector */
1302  int ldb=n;
1303 
1304 
1305  /* Initialize the statistics variables. */
1306  PStatInit(&stat);
1307 
1308  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1309  SET UP GRID, FACTORS, ETC
1310  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1311  if (opt_flag==1)
1312  {
1313  /* Allocate data structure to store data between calls to this function */
1314  superlu_data =
1315  (superlu_dist_data *) SUPERLU_MALLOC(sizeof(superlu_dist_data));
1316 
1317  /* Initialize the superlu process grid. */
1318  grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));
1319  superlu_gridinit(comm, nprow, npcol, grid);
1320  superlu_data->grid = grid;
1321 
1322  /* Bail out if I do not belong in the grid. */
1323  iam = grid->iam;
1324  if (iam >= nprow * npcol)
1325  {
1326  return;
1327  }
1328 
1329  /* Allocate memory for SuperLU_DIST structures */
1330  options = (superlu_options_t *) SUPERLU_MALLOC(sizeof(superlu_options_t));
1331  A = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
1332  AC = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
1333  ScalePermstruct =
1334  (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
1335  LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t));
1336 
1337  /* Set the default options */
1338  set_default_options_dist(options);
1339 
1340  /* Is the matrix transposed (NOTRANS or TRANS)? */
1341  options->Trans = NOTRANS;
1342 
1343  /* Row permutations (NATURAL [= do nothing], */
1344  /* LargeDiag [default], ...)? */
1345  /* options->RowPerm=NATURAL; */
1346  options->RowPerm=LargeDiag;
1347 
1348  /* Column permutations (NATURAL [= do nothing], */
1349  /* MMD_AT_PLUS_A [default],...) */
1350  options->ColPerm=MMD_AT_PLUS_A;
1351 
1352  /* Use natural ordering instead? */
1353  if (allow_permutations==0)
1354  {
1355  options->ColPerm=NATURAL;
1356  options->RowPerm=NATURAL;
1357  }
1358 
1359  /* Iterative refinement (essential as far as I can tell).*/
1360  /* Can be "NO" or "DOUBLE"*/
1361  options->IterRefine = SLU_DOUBLE;
1362 
1363  /* Print stats during solve? */
1364  if (doc==0)
1365  {
1366  options->PrintStat = YES;
1367  }
1368  else
1369  {
1370  options->PrintStat = NO;
1371  }
1372 
1373  /* Doc output on process 0 if required: */
1374  if ((!iam)&&(doc==0))
1375  {
1376  printf("\nPerforming SuperLU_DIST setup\n");
1377  printf("Process grid\t%d X %d\n", grid->nprow, grid->npcol);
1378  print_options_dist(options);
1379  }
1380 
1381 
1382  /* Create SuperMatrix from compressed column representation */
1383  dCreate_CompCol_Matrix_dist(A,m,n,nnz,values,row_index,col_start,
1384  SLU_NC,SLU_D,SLU_GE);
1385 
1386  /* Initialize ScalePermstruct and LUstruct. */
1387  ScalePermstructInit(m, n, ScalePermstruct);
1388  LUstructInit(m, n, LUstruct);
1389 
1390  /* Test the control parameters etc. */
1391  *info = 0;
1392  Fact = options->Fact;
1393  if (Fact < 0 || Fact > FACTORED)
1394  {
1395  *info = -1;
1396  }
1397  else if (options->RowPerm < 0 || options->RowPerm > MY_PERMR)
1398  {
1399  *info = -1;
1400  }
1401  else if (options->ColPerm < 0 || options->ColPerm > MY_PERMC)
1402  {
1403  *info = -1;
1404  }
1405  else if (options->IterRefine < 0 || options->IterRefine > SLU_EXTRA)
1406  {
1407  *info = -1;
1408  }
1409  else if (options->IterRefine == SLU_EXTRA)
1410  {
1411  *info = -1;
1412  fprintf(stderr, "Extra precise iterative refinement yet to support.\n");
1413  }
1414  else if (A->nrow != A->ncol || A->nrow < 0 ||
1415  A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE)
1416  {
1417  *info = -2;
1418  }
1419  else if (ldb < A->nrow)
1420  {
1421  *info = -5;
1422  }
1423  else if (nrhs < 0)
1424  {
1425  *info = -6;
1426  }
1427  if (*info)
1428  {
1429  printf("Trouble in pdgstrf. Info=%i\n",-*info);
1430  if (*info==-1)
1431  {
1432  printf("Error in options.\n");
1433  }
1434  else if (*info==-2)
1435  {
1436  printf("Error in matrix.\n");
1437  }
1438  else if (*info==-5)
1439  {
1440  printf("ldb < A->nrow\n");
1441  }
1442  else if (*info==-6)
1443  {
1444  printf("nrhs < 0\n");
1445  }
1446  return;
1447  }
1448 
1449  /* Initialization. */
1450  factored = (Fact == FACTORED);
1451  Equil = (!factored && options->Equil == YES);
1452  notran = (options->Trans == NOTRANS);
1453  job = 5;
1454  Astore = A->Store;
1455  nnz = Astore->nnz;
1456  a = Astore->nzval;
1457  colptr = Astore->colptr;
1458  rowind = Astore->rowind;
1459  if (factored || (Fact == SamePattern_SameRowPerm && Equil))
1460  {
1461  rowequ = (ScalePermstruct->DiagScale == ROW) ||
1462  (ScalePermstruct->DiagScale == BOTH);
1463  colequ = (ScalePermstruct->DiagScale == COL) ||
1464  (ScalePermstruct->DiagScale == BOTH);
1465  }
1466  else
1467  {
1468  rowequ = colequ = FALSE;
1469  }
1470 
1471  perm_r = ScalePermstruct->perm_r;
1472  perm_c = ScalePermstruct->perm_c;
1473  etree = LUstruct->etree;
1474  R = ScalePermstruct->R;
1475  C = ScalePermstruct->C;
1476  Glu_persist = LUstruct->Glu_persist;
1477  if (Equil)
1478  {
1479  /* Allocate storage if not done so before. */
1480  switch (ScalePermstruct->DiagScale)
1481  {
1482  case NOEQUIL:
1483  if (!(R = (double *) doubleMalloc_dist(m)))
1484  ABORT("Malloc fails for R[].");
1485  if (!(C = (double *) doubleMalloc_dist(n)))
1486  ABORT("Malloc fails for C[].");
1487  ScalePermstruct->R = R;
1488  ScalePermstruct->C = C;
1489  break;
1490  case ROW:
1491  if (!(C = (double *) doubleMalloc_dist(n)))
1492  ABORT("Malloc fails for C[].");
1493  ScalePermstruct->C = C;
1494  break;
1495  case COL:
1496  if (!(R = (double *) doubleMalloc_dist(m)))
1497  ABORT("Malloc fails for R[].");
1498  ScalePermstruct->R = R;
1499  break;
1500  default:
1501  printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL);
1502  ABORT("Never get here.");
1503  break;
1504  }
1505  }
1506 
1507  /* ------------------------------------------------------------
1508  Diagonal scaling to equilibrate the matrix.
1509  ------------------------------------------------------------*/
1510  if (Equil)
1511  {
1512  t = SuperLU_timer_();
1513 
1514  if (Fact == SamePattern_SameRowPerm)
1515  {
1516  /* Reuse R and C. */
1517  switch (ScalePermstruct->DiagScale)
1518  {
1519  case NOEQUIL:
1520  break;
1521  case ROW:
1522  for (j = 0; j < n; ++j)
1523  {
1524  for (i = colptr[j]; i < colptr[j+1]; ++i)
1525  {
1526  irow = rowind[i];
1527  a[i] *= R[irow]; /* Scale rows. */
1528  }
1529  }
1530  break;
1531  case COL:
1532  for (j = 0; j < n; ++j)
1533  {
1534  for (i = colptr[j]; i < colptr[j+1]; ++i)
1535  {
1536  a[i] *= C[j]; /* Scale columns. */
1537  }
1538  }
1539  break;
1540  case BOTH:
1541  for (j = 0; j < n; ++j)
1542  {
1543  for (i = colptr[j]; i < colptr[j+1]; ++i)
1544  {
1545  irow = rowind[i];
1546  a[i] *= R[irow] * C[j]; /* Scale rows and columns. */
1547  }
1548  }
1549  break;
1550  }
1551  }
1552  else
1553  {
1554  if (!iam)
1555  {
1556  /* Compute row and column scalings to equilibrate matrix A. */
1557  dgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo);
1558 
1559  MPI_Bcast(&iinfo, 1, mpi_int_t, 0, grid->comm);
1560  if (iinfo == 0)
1561  {
1562  MPI_Bcast(R, m, MPI_DOUBLE, 0, grid->comm);
1563  MPI_Bcast(C, n, MPI_DOUBLE, 0, grid->comm);
1564  MPI_Bcast(&rowcnd, 1, MPI_DOUBLE, 0, grid->comm);
1565  MPI_Bcast(&colcnd, 1, MPI_DOUBLE, 0, grid->comm);
1566  MPI_Bcast(&amax, 1, MPI_DOUBLE, 0, grid->comm);
1567  }
1568  else
1569  {
1570  if (iinfo > 0)
1571  {
1572  if (iinfo <= m)
1573  {
1574  fprintf(stderr, "The %d-th row of A is exactly zero\n",
1575  iinfo);
1576  }
1577  else
1578  {
1579  fprintf(stderr, "The %d-th column of A is exactly zero\n",
1580  iinfo-n);
1581  }
1582  exit(-1);
1583  }
1584  }
1585  }
1586  else
1587  {
1588  MPI_Bcast(&iinfo, 1, mpi_int_t, 0, grid->comm);
1589  if (iinfo == 0)
1590  {
1591  MPI_Bcast(R, m, MPI_DOUBLE, 0, grid->comm);
1592  MPI_Bcast(C, n, MPI_DOUBLE, 0, grid->comm);
1593  MPI_Bcast(&rowcnd, 1, MPI_DOUBLE, 0, grid->comm);
1594  MPI_Bcast(&colcnd, 1, MPI_DOUBLE, 0, grid->comm);
1595  MPI_Bcast(&amax, 1, MPI_DOUBLE, 0, grid->comm);
1596  }
1597  else
1598  {
1599  ABORT("DGSEQU failed\n");
1600  }
1601  }
1602 
1603  /* Equilibrate matrix A. */
1604  dlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed);
1605  if (lsame_(equed, "R"))
1606  {
1607  ScalePermstruct->DiagScale = rowequ = ROW;
1608  }
1609  else if (lsame_(equed, "C"))
1610  {
1611  ScalePermstruct->DiagScale = colequ = COL;
1612  }
1613  else if (lsame_(equed, "B"))
1614  {
1615  ScalePermstruct->DiagScale = BOTH;
1616  rowequ = ROW;
1617  colequ = COL;
1618  }
1619  else
1620  {
1621  ScalePermstruct->DiagScale = NOEQUIL;
1622  }
1623  } /* if Fact ... */
1624 
1625  stat.utime[EQUIL] = SuperLU_timer_() - t;
1626  } /* if Equil ... */
1627 
1628 
1629  /* ------------------------------------------------------------
1630  Permute rows of A.
1631  ------------------------------------------------------------*/
1632  if ((int) options->RowPerm != (int) NO)
1633  {
1634  t = SuperLU_timer_();
1635 
1636  if (Fact == SamePattern_SameRowPerm /* Reuse perm_r. */
1637  || options->RowPerm == MY_PERMR)
1638  {
1639  /* Use my perm_r. */
1640  /* for (j = 0; j < n; ++j) {
1641  for (i = colptr[j]; i < colptr[j+1]; ++i) {*/
1642  for (i = 0; i < colptr[n]; ++i)
1643  {
1644  irow = rowind[i];
1645  rowind[i] = perm_r[irow];
1646  /* }*/
1647  }
1648  }
1649  else if (!factored)
1650  {
1651  if (job == 5)
1652  {
1653  /* Allocate storage for scaling factors. */
1654  if (!(R1 = (double *) SUPERLU_MALLOC(m * sizeof(double))))
1655  ABORT("SUPERLU_MALLOC fails for R1[]");
1656  if (!(C1 = (double *) SUPERLU_MALLOC(n * sizeof(double))))
1657  ABORT("SUPERLU_MALLOC fails for C1[]");
1658  }
1659 
1660  if (!iam)
1661  {
1662  /* Process 0 finds a row permutation for large diagonal. */
1663  dldperm(job, m, nnz, colptr, rowind, a, perm_r, R1, C1);
1664 
1665  MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
1666  if (job == 5 && Equil)
1667  {
1668  MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
1669  MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
1670  }
1671  }
1672  else
1673  {
1674  MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
1675  if (job == 5 && Equil)
1676  {
1677  MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
1678  MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
1679  }
1680  }
1681 
1682  if (job == 5)
1683  {
1684  if (Equil)
1685  {
1686  for (i = 0; i < n; ++i)
1687  {
1688  R1[i] = exp(R1[i]);
1689  C1[i] = exp(C1[i]);
1690  }
1691  for (j = 0; j < n; ++j)
1692  {
1693  for (i = colptr[j]; i < colptr[j+1]; ++i)
1694  {
1695  irow = rowind[i];
1696  a[i] *= R1[irow] * C1[j]; /* Scale the matrix. */
1697  rowind[i] = perm_r[irow];
1698  }
1699  }
1700 
1701  /* Multiply together the scaling factors. */
1702  if (rowequ)
1703  {
1704  for (i = 0; i < m; ++i)
1705  {
1706  R[i] *= R1[i];
1707  }
1708  }
1709  else
1710  {
1711  for (i = 0; i < m; ++i)
1712  {
1713  R[i] = R1[i];
1714  }
1715  }
1716  if (colequ)
1717  {
1718  for (i = 0; i < n; ++i)
1719  {
1720  C[i] *= C1[i];
1721  }
1722  }
1723  else
1724  {
1725  for (i = 0; i < n; ++i)
1726  {
1727  C[i] = C1[i];
1728  }
1729  }
1730 
1731  ScalePermstruct->DiagScale = BOTH;
1732  rowequ = colequ = 1;
1733  }
1734  else
1735  {
1736  /* No equilibration. */
1737  /* for (j = 0; j < n; ++j) {
1738  for (i = colptr[j]; i < colptr[j+1]; ++i) {*/
1739  for (i = colptr[0]; i < colptr[n]; ++i)
1740  {
1741  irow = rowind[i];
1742  rowind[i] = perm_r[irow];
1743  }
1744  /* }*/
1745  }
1746  SUPERLU_FREE(R1);
1747  SUPERLU_FREE(C1);
1748  }
1749  else
1750  {
1751  /* job = 2,3,4 */
1752  for (j = 0; j < n; ++j)
1753  {
1754  for (i = colptr[j]; i < colptr[j+1]; ++i)
1755  {
1756  irow = rowind[i];
1757  rowind[i] = perm_r[irow];
1758  }
1759  }
1760  }
1761  } /* else !factored */
1762 
1763  t = SuperLU_timer_() - t;
1764  stat.utime[ROWPERM] = t;
1765  } /* if options->RowPerm ... */
1766 
1767  if (!factored || options->IterRefine)
1768  {
1769  /* Compute norm(A), which will be used to adjust small diagonal. */
1770  if (notran)
1771  {
1772  *(unsigned char *)norm = '1';
1773  }
1774  else
1775  {
1776  *(unsigned char *)norm = 'I';
1777  }
1778  anorm = dlangs_dist(norm, A);
1779  }
1780 
1781 
1782 
1783  /* ------------------------------------------------------------
1784  Perform the LU factorization.
1785  ------------------------------------------------------------*/
1786  if (!factored)
1787  {
1788  t = SuperLU_timer_();
1789  /*
1790  Get column permutation vector perm_c[], according to permc_spec:
1791  permc_spec = NATURAL: natural ordering
1792  permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
1793  permc_spec = MMD_ATA: minimum degree on structure of A'*A
1794  permc_spec = COLAMD: approximate minimum degree column ordering
1795  permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
1796  */
1797  permc_spec = options->ColPerm;
1798  if (permc_spec != MY_PERMC && Fact == DOFACT)
1799  {
1800  /* Use an ordering provided by SuperLU */
1801  get_perm_c_dist(iam, permc_spec, A, perm_c);
1802  }
1803 
1804  /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'
1805  (a.k.a. column etree), depending on the choice of ColPerm.
1806  Adjust perm_c[] to be consistent with a postorder of etree.
1807  Permute columns of A to form A*Pc'. */
1808  sp_colorder(options, A, perm_c, etree, AC);
1809 
1810  /* Form Pc*A*Pc' to preserve the diagonal of the matrix Pr*A. */
1811  ACstore = AC->Store;
1812  for (j = 0; j < n; ++j)
1813  {
1814  for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i)
1815  {
1816  irow = ACstore->rowind[i];
1817  ACstore->rowind[i] = perm_c[irow];
1818  }
1819  }
1820  stat.utime[COLPERM] = SuperLU_timer_() - t;
1821 
1822  /* Perform a symbolic factorization on matrix A and set up the
1823  nonzero data structures which are suitable for supernodal GENP. */
1824  if (Fact != SamePattern_SameRowPerm)
1825  {
1826  t = SuperLU_timer_();
1827  if (!(Glu_freeable = (Glu_freeable_t *)
1828  SUPERLU_MALLOC(sizeof(Glu_freeable_t))))
1829  ABORT("Malloc fails for Glu_freeable.");
1830 
1831  iinfo = symbfact(options, iam, AC, perm_c, etree,
1832  Glu_persist, Glu_freeable);
1833 
1834  stat.utime[SYMBFAC] = SuperLU_timer_() - t;
1835 
1836  if (iinfo < 0)
1837  {
1838  QuerySpace_dist(n, -iinfo, Glu_freeable,
1840  }
1841  else
1842  {
1843  if (!iam)
1844  {
1845  fprintf(stderr, "symbfact() error returns %d\n", iinfo);
1846  exit(-1);
1847  }
1848  }
1849  }
1850 
1851  /* Distribute the L and U factors onto the process grid. */
1852  t = SuperLU_timer_();
1853  /* dist_mem_use = */
1854  ddistribute(Fact, n, AC, Glu_freeable, LUstruct, grid);
1855  stat.utime[DIST] = SuperLU_timer_() - t;
1856 
1857  /* Deallocate storage used in symbolic factor. */
1858  if (Fact != SamePattern_SameRowPerm)
1859  {
1860  iinfo = symbfact_SubFree(Glu_freeable);
1861  SUPERLU_FREE(Glu_freeable);
1862  }
1863 
1864  /* Perform numerical factorization in parallel. */
1865  t = SuperLU_timer_();
1866  pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
1867  stat.utime[FACT] = SuperLU_timer_() - t;
1868  }
1869  else if (options->IterRefine)
1870  {
1871  /* options->Fact==FACTORED */
1872  /* Permute columns of A to form A*Pc' using the existing perm_c.
1873  NOTE: rows of A were previously permuted to Pc*A.
1874  */
1875  sp_colorder(options, A, perm_c, NULL, AC);
1876  } /* if !factored ... */
1877 
1878  if (*info!=0)
1879  {
1880  printf("Trouble in pdgstrf. Info=%i\n",*info);
1881  if (*info>0)
1882  {
1883  printf("U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
1884  printf("been completed, but the factor U is exactly singular,\n");
1885  printf("and division by zero will occur if it is used to solve a\n");
1886  printf("system of equations.\n");
1887  }
1888  else
1889  {
1890  printf("The %i-th argument had an illegal value.\n", *info);
1891  }
1892  }
1893 
1894  /* Print the statistics. */
1895  if ((doc==0) && (!iam))
1896  {
1897  printf("\nstats after setup....\n");
1898  PStatPrint(options, &stat, grid);
1899  }
1900 
1901  /* ------------------------------------------------------------
1902  Set up data structure.
1903  ------------------------------------------------------------*/
1904  superlu_data->A = A;
1905  superlu_data->AC = AC;
1906  superlu_data->options = options;
1907  superlu_data->ScalePermstruct = ScalePermstruct;
1908  superlu_data->LUstruct = LUstruct;
1909  superlu_data->colequ = colequ;
1910  superlu_data->rowequ = rowequ;
1911  superlu_data->anorm = anorm;
1912  *data = superlu_data;
1913 
1914  } /* End of setup */
1915 
1916 
1917  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1918  PERFORM A SOLVE
1919  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1920  if (opt_flag==2)
1921  {
1922  /* Get pointer to the grid */
1923  superlu_data = (superlu_dist_data *)*data;
1924  grid = superlu_data->grid;
1925 
1926  /* Bail out if I do not belong in the grid. */
1927  iam = grid->iam;
1928  if (iam >= nprow * npcol)
1929  {
1930  return;
1931  }
1932 
1933  if ((doc==0)&&(!iam))
1934  {
1935  printf("\nPerforming SuperLU_DIST solve\n");
1936  }
1937 
1938  /* ------------------------------------------------------------
1939  Set other pointers to data structure.
1940  ------------------------------------------------------------*/
1941  A = superlu_data->A;
1942  AC = superlu_data->AC;
1943  options = superlu_data->options;
1944  ScalePermstruct = superlu_data->ScalePermstruct;
1945  LUstruct = superlu_data->LUstruct;
1946  colequ = superlu_data->colequ;
1947  rowequ = superlu_data->rowequ;
1948  anorm = superlu_data->anorm;
1949 
1950  /* Initialization. */
1951  Astore = A->Store;
1952  colptr = Astore->colptr;
1953  rowind = Astore->rowind;
1954  R = ScalePermstruct->R;
1955  C = ScalePermstruct->C;
1956  perm_r = ScalePermstruct->perm_r;
1957  perm_c = ScalePermstruct->perm_c;
1958 
1959  /* Local control paramaters */
1960  Fact = options->Fact;
1961  factored = (Fact == FACTORED);
1962  Equil = (!factored && options->Equil == YES);
1963  notran = (options->Trans == NOTRANS);
1964 
1965 
1966  /* ------------------------------------------------------------
1967  Compute the solution matrix X.
1968  ------------------------------------------------------------*/
1969  if (!(b_work = doubleMalloc_dist(n)))
1970  {
1971  ABORT("Malloc fails for b_work[]");
1972  }
1973 
1974  /* ------------------------------------------------------------
1975  Scale the right-hand side if equilibration was performed.
1976  ------------------------------------------------------------*/
1977  if (notran)
1978  {
1979  if (rowequ)
1980  {
1981  b_col = B;
1982  for (j = 0; j < nrhs; ++j)
1983  {
1984  for (i = 0; i < m; ++i)
1985  {
1986  b_col[i] *= R[i];
1987  }
1988  b_col += ldb;
1989  }
1990  }
1991  }
1992  else if (colequ)
1993  {
1994  b_col = B;
1995  for (j = 0; j < nrhs; ++j)
1996  {
1997  for (i = 0; i < m; ++i)
1998  {
1999  b_col[i] *= C[i];
2000  }
2001  b_col += ldb;
2002  }
2003  }
2004 
2005  /* ------------------------------------------------------------
2006  Permute the right-hand side to form Pr*B.
2007  ------------------------------------------------------------*/
2008  if ((int) options->RowPerm != (int) NO)
2009  {
2010  if (notran)
2011  {
2012  b_col = B;
2013  for (j = 0; j < nrhs; ++j)
2014  {
2015  for (i = 0; i < m; ++i)
2016  {
2017  b_work[perm_r[i]] = b_col[i];
2018  }
2019  for (i = 0; i < m; ++i)
2020  {
2021  b_col[i] = b_work[i];
2022  }
2023  b_col += ldb;
2024  }
2025  }
2026  }
2027 
2028 
2029  /* ------------------------------------------------------------
2030  Permute the right-hand side to form Pc*B.
2031  ------------------------------------------------------------*/
2032  if (notran)
2033  {
2034  b_col = B;
2035  for (j = 0; j < nrhs; ++j)
2036  {
2037  for (i = 0; i < m; ++i)
2038  {
2039  b_work[perm_c[i]] = b_col[i];
2040  }
2041  for (i = 0; i < m; ++i)
2042  {
2043  b_col[i] = b_work[i];
2044  }
2045  b_col += ldb;
2046  }
2047  }
2048 
2049 
2050  /* Save a copy of the right-hand side. */
2051  ldx = ldb;
2052  if (!(X = doubleMalloc_dist(((size_t)ldx) * nrhs)))
2053  {
2054  ABORT("Malloc fails for X[]");
2055  }
2056 
2057  x_col = X;
2058  b_col = B;
2059  for (j = 0; j < nrhs; ++j)
2060  {
2061  for (i = 0; i < ldb; ++i)
2062  {
2063  x_col[i] = b_col[i];
2064  }
2065  x_col += ldx;
2066  b_col += ldb;
2067  }
2068 
2069  /* ------------------------------------------------------------
2070  Solve the linear system.
2071  ------------------------------------------------------------*/
2072  pdgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, &stat, info);
2073  if (*info!=0)
2074  {
2075  printf("Trouble in pdgstrs_Bglobal. Info=%i\n",*info);
2076  printf("The %i-th argument had an illegal value.\n", *info);
2077  }
2078 
2079  /* ------------------------------------------------------------
2080  Use iterative refinement to improve the computed solution and
2081  compute error bounds and backward error estimates for it.
2082  ------------------------------------------------------------*/
2083  if (options->IterRefine)
2084  {
2085  /* Improve the solution by iterative refinement. */
2086  t = SuperLU_timer_();
2087 
2088  /* Storage for backward error */
2089  if (!(berr = doubleMalloc_dist(nrhs)))
2090  {
2091  ABORT("Malloc fails for berr[].");
2092  }
2093 
2094  pdgsrfs_ABXglobal(n, AC, anorm, LUstruct, grid, B, ldb,
2095  X, ldx, nrhs, berr, &stat, info);
2096  if (*info!=0)
2097  {
2098  printf("Trouble in pdgsrfs_ABXglobal. Info=%i\n",*info);
2099  printf("The %i-th argument had an illegal value.\n", *info);
2100  }
2101  stat.utime[REFINE] = SuperLU_timer_() - t;
2102  }
2103 
2104  /* Print the statistics. */
2105  if ((doc==0) && (!iam))
2106  {
2107  printf("\nstats after solve....\n");
2108  PStatPrint(options, &stat, grid);
2109  }
2110 
2111  /* Permute the solution matrix X <= Pc'*X. */
2112  for (j = 0; j < nrhs; j++)
2113  {
2114  b_col = &B[j*ldb];
2115  x_col = &X[j*ldx];
2116  for (i = 0; i < n; ++i)
2117  {
2118  b_col[i] = x_col[perm_c[i]];
2119  }
2120  }
2121 
2122  /* Transform the solution matrix X to a solution of the original system
2123  before the equilibration. */
2124  if (notran)
2125  {
2126  if (colequ)
2127  {
2128  b_col = B;
2129  for (j = 0; j < nrhs; ++j)
2130  {
2131  for (i = 0; i < n; ++i)
2132  {
2133  b_col[i] *= C[i];
2134  }
2135  b_col += ldb;
2136  }
2137  }
2138  }
2139  else if (rowequ)
2140  {
2141  b_col = B;
2142  for (j = 0; j < nrhs; ++j)
2143  {
2144  for (i = 0; i < n; ++i)
2145  {
2146  b_col[i] *= R[i];
2147  }
2148  b_col += ldb;
2149  }
2150  }
2151 
2152 
2153  /* Clean up memory */
2154  if (options->IterRefine)
2155  {
2156  SUPERLU_FREE(berr);
2157  }
2158  SUPERLU_FREE(b_work);
2159  SUPERLU_FREE(X);
2160 
2161  } /* End of solve */
2162 
2163  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2164  PERFORM CLEAN UP OF MEMORY
2165  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
2166  if (opt_flag==3)
2167  {
2168  /* Get pointer to the process grid */
2169  superlu_data = (superlu_dist_data *)*data;
2170  grid = superlu_data->grid;
2171 
2172  /* Bail out if I do not belong in the grid. */
2173  iam = grid->iam;
2174  if (iam >= nprow * npcol) goto out;
2175  if ((doc==0)&&(!iam))
2176  {
2177  printf("\nCleaning up memory allocated for SuperLU_DIST\n");
2178  }
2179 
2180  /* ------------------------------------------------------------
2181  Set pointers to the data structure.
2182  ------------------------------------------------------------*/
2183  A = superlu_data->A;
2184  AC = superlu_data->AC;
2185  options = superlu_data->options;
2186  ScalePermstruct = superlu_data->ScalePermstruct;
2187  LUstruct = superlu_data->LUstruct;
2188 
2189  /* -------------------------------
2190  Set the other pointers required
2191  -------------------------------*/
2192  R = ScalePermstruct->R;
2193  C = ScalePermstruct->C;
2194 
2195  /* Local control paramaters */
2196  Fact = options->Fact;
2197  factored = (Fact == FACTORED);
2198  Equil = (!factored && options->Equil == YES);
2199  rowequ = colequ = FALSE;
2200  if (factored || (Fact == SamePattern_SameRowPerm && Equil))
2201  {
2202  rowequ = (ScalePermstruct->DiagScale == ROW) ||
2203  (ScalePermstruct->DiagScale == BOTH);
2204  colequ = (ScalePermstruct->DiagScale == COL) ||
2205  (ScalePermstruct->DiagScale == BOTH);
2206  }
2207  else
2208  {
2209  rowequ = colequ = FALSE;
2210  }
2211 
2212  /* Deallocate storage. */
2213  if (Equil && Fact != SamePattern_SameRowPerm)
2214  {
2215  switch (ScalePermstruct->DiagScale)
2216  {
2217  case NOEQUIL:
2218  SUPERLU_FREE(R);
2219  SUPERLU_FREE(C);
2220  break;
2221  case ROW:
2222  SUPERLU_FREE(C);
2223  break;
2224  case COL:
2225  SUPERLU_FREE(R);
2226  break;
2227  default:
2228  /* Apparently this one is ok */
2229  /* printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL); */
2230  /* ABORT("Never get here."); */
2231  break;
2232  }
2233  }
2234  if (!factored || (factored && options->IterRefine))
2235  {
2236  Destroy_CompCol_Permuted_dist(AC);
2237  }
2238 
2239  /* Free storage */
2240  ScalePermstructFree(ScalePermstruct);
2241  Destroy_LU(n, grid, LUstruct);
2242  LUstructFree(LUstruct);
2243  //Destroy_CompRowLoc_Matrix_dist(&A);
2244  // Only destroy the store part of the matrix
2245  Destroy_SuperMatrix_Store_dist(A);
2246 
2247  /* Deallocate memory */
2248  SUPERLU_FREE(A);
2249  SUPERLU_FREE(AC);
2250  SUPERLU_FREE(ScalePermstruct);
2251  SUPERLU_FREE(LUstruct);
2252  SUPERLU_FREE(options);
2253 
2254  /* Release the superlu process grid. */
2255 out:
2256  superlu_gridexit(grid);
2257 
2258  SUPERLU_FREE(grid);
2259  SUPERLU_FREE(superlu_data);
2260  }
2261 
2262  /* Free storage */
2263  PStatFree(&stat);
2264 
2265  return;
2266 }
2267 
2268 
if(e >s)
Definition: cfortran.h:572
cstr elem_len * i
Definition: cfortran.h:603
char t
Definition: cfortran.h:568
mem_usage_t Memory_usage
Definition: superlu.c:44
int Memory_usage_has_been_recorded
Definition: superlu.c:47
SuperMatrix * AC
Definition: superlu_dist.c:50
ScalePermstruct_t * ScalePermstruct
Definition: superlu_dist.c:51
gridinfo_t * grid
Definition: superlu_dist.c:48
SOLVEstruct_t * SOLVEstruct
Definition: superlu_dist.c:53
SuperMatrix * A
Definition: superlu_dist.c:49
superlu_options_t * options
Definition: superlu_dist.c:54
LUstruct_t * LUstruct
Definition: superlu_dist.c:52
double get_total_memory_usage_in_bytes_dist()
Definition: superlu_dist.c:93
struct MemoryStatisticsStorage symbolic_memory_statistics_storage
double get_lu_factor_memory_usage_in_bytes_dist()
Definition: superlu_dist.c:77
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
Definition: superlu_dist.c:120
void get_memory_usage_in_bytes_dist(double *lu_factor_memory, double *total_memory)
Definition: superlu_dist.c:109
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
Definition: superlu_dist.c:161
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n_in, int nnz_in, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)