29 #ifdef USING_OOMPH_SUPERLU_DIST
30 #include "oomph_superlu_dist_3.0.h"
32 #include<superlu_defs.h>
33 #include<superlu_ddefs.h>
37 #include<supermatrix.h>
38 #include<old_colamd.h>
110 double* total_memory)
121 int* cr_index,
int* cr_start,
double** cc_values,
122 int** cc_index,
int** cc_start)
124 dCompRow_to_CompCol(nrow,ncol,nnz,cr_values,cr_index,cr_start,cc_values,
162 int n,
int nnz_local,
163 int nrow_local,
int first_row,
164 double *values,
int *col_index,
165 int *row_start,
double *b,
166 int nprow,
int npcol,
167 int doc,
void **data,
int *info,
171 superlu_options_t *options;
174 ScalePermstruct_t *ScalePermstruct;
175 LUstruct_t *LUstruct;
176 SOLVEstruct_t *SOLVEstruct;
185 int_t *rowptr=NULL; int_t *colind;
186 int_t job, rowequ, colequ, iinfo, need_value,
i, j, irow, icol;
187 int_t m_loc, fst_row, nnz, nnz_loc;
188 int_t *colptr, *rowind;
189 NRformat_loc *Astore;
195 Glu_persist_t *Glu_persist;
196 Glu_freeable_t *Glu_freeable=NULL;
200 double *a=NULL;
double *X, *b_col;
202 double *C, *
R, *C1, *R1, *x_col;
203 double amax,
t, colcnd, rowcnd;
double anorm=0.0;
204 char equed[1], norm[1];
208 int_t Equil, factored, notran, permc_spec;
235 grid = (gridinfo_t *) SUPERLU_MALLOC(
sizeof(gridinfo_t));
236 superlu_gridinit(comm, nprow, npcol, grid);
237 superlu_data->
grid = grid;
241 if (iam >= nprow * npcol)
return;
244 options = (superlu_options_t *) SUPERLU_MALLOC(
sizeof(superlu_options_t));
245 A = (SuperMatrix *) SUPERLU_MALLOC(
sizeof(SuperMatrix));
247 (ScalePermstruct_t *) SUPERLU_MALLOC(
sizeof(ScalePermstruct_t));
248 LUstruct = (LUstruct_t *) SUPERLU_MALLOC(
sizeof(LUstruct_t));
249 SOLVEstruct = (SOLVEstruct_t *) SUPERLU_MALLOC(
sizeof(SOLVEstruct_t));
252 dCreate_CompRowLoc_Matrix_dist(A,m,n,nnz_local,nrow_local,first_row,
253 values,col_index,row_start,
254 SLU_NR_loc,SLU_D,SLU_GE);
257 set_default_options_dist(options);
260 options->Trans = NOTRANS;
265 options->RowPerm=LargeDiag;
269 options->ColPerm=MMD_AT_PLUS_A;
272 if (allow_permutations==0)
274 options->ColPerm=NATURAL;
275 options->RowPerm=NATURAL;
283 options->IterRefine = SLU_DOUBLE;
288 options->PrintStat = YES;
292 options->PrintStat = NO;
297 if ((!iam)&&(
doc==0))
299 printf(
"\nPerforming SuperLU_DIST setup\n");
300 printf(
"Process grid\t%d X %d\n", grid->nprow, grid->npcol);
301 print_options_dist(options);
305 ScalePermstructInit(m, n, ScalePermstruct);
306 LUstructInit(m, n, LUstruct);
309 Glu_persist = LUstruct->Glu_persist;
310 Astore = (NRformat_loc *) A->Store;
311 nnz_loc = Astore->nnz_loc;
312 m_loc = Astore->m_loc;
313 fst_row = Astore->fst_row;
315 rowptr = Astore->rowptr;
316 colind = Astore->colind;
319 Fact = options->Fact;
320 factored = (Fact == FACTORED);
321 Equil = (!factored && options->Equil == YES);
322 notran = (options->Trans == NOTRANS);
323 rowequ = colequ = FALSE;
324 if (factored || (Fact == SamePattern_SameRowPerm && Equil))
326 rowequ = (ScalePermstruct->DiagScale == ROW) ||
327 (ScalePermstruct->DiagScale == BOTH);
328 colequ = (ScalePermstruct->DiagScale == COL) ||
329 (ScalePermstruct->DiagScale == BOTH);
333 rowequ = colequ = FALSE;
338 if (Fact < 0 || Fact > FACTORED)
342 else if (options->RowPerm < 0 || options->RowPerm > MY_PERMR)
346 else if (options->ColPerm < 0 || options->ColPerm > MY_PERMC)
350 else if (options->IterRefine < 0 || options->IterRefine > SLU_EXTRA)
354 else if (options->IterRefine == SLU_EXTRA)
357 fprintf(stderr,
"Extra precise iterative refinement yet to support.\n");
359 else if (A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
360 || A->Dtype != SLU_D || A->Mtype != SLU_GE)
364 else if (ldb < m_loc)
374 printf(
"Trouble in pdgstrf. Info=%i\n",*info);
377 printf(
"Error in options.\n");
381 printf(
"Error in matrix.\n");
385 printf(
"ldb < m_loc\n");
389 printf(
"nrhs < 0\n");
395 perm_r = ScalePermstruct->perm_r;
396 perm_c = ScalePermstruct->perm_c;
397 etree = LUstruct->etree;
398 R = ScalePermstruct->R;
399 C = ScalePermstruct->C;
406 switch (ScalePermstruct->DiagScale)
409 if (!(
R = (
double *) doubleMalloc_dist(m)))
410 ABORT(
"Malloc fails for R[].");
411 if (!(C = (
double *) doubleMalloc_dist(n)))
412 ABORT(
"Malloc fails for C[].");
413 ScalePermstruct->R =
R;
414 ScalePermstruct->C = C;
417 if (!(C = (
double *) doubleMalloc_dist(n)))
418 ABORT(
"Malloc fails for C[].");
419 ScalePermstruct->C = C;
422 if (!(
R = (
double *) doubleMalloc_dist(m)))
423 ABORT(
"Malloc fails for R[].");
424 ScalePermstruct->R =
R;
427 printf(
"diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL);
428 ABORT(
"Never get here.");
438 t = SuperLU_timer_();
440 if (Fact == SamePattern_SameRowPerm)
443 switch (ScalePermstruct->DiagScale)
449 for (j = 0; j < m_loc; ++j)
451 for (
i = rowptr[j];
i < rowptr[j+1]; ++
i)
459 for (j = 0; j < m_loc; ++j)
461 for (
i = rowptr[j];
i < rowptr[j+1]; ++
i)
470 for (j = 0; j < m_loc; ++j)
472 for (
i = rowptr[j];
i < rowptr[j+1]; ++
i)
475 a[
i] *=
R[irow] * C[icol];
486 pdgsequ(A,
R, C, &rowcnd, &colcnd, &amax, &iinfo, grid);
489 pdlaqgs(A,
R, C, rowcnd, colcnd, amax, equed);
491 if (lsame_(equed,
"R"))
493 ScalePermstruct->DiagScale = rowequ = ROW;
495 else if (lsame_(equed,
"C"))
497 ScalePermstruct->DiagScale = colequ = COL;
499 else if (lsame_(equed,
"B"))
501 ScalePermstruct->DiagScale = BOTH;
506 ScalePermstruct->DiagScale = NOEQUIL;
509 stat.utime[EQUIL] = SuperLU_timer_() -
t;
521 if (Fact != SamePattern_SameRowPerm)
523 need_value = (options->RowPerm == LargeDiag);
524 pdCompRow_loc_to_CompCol_global(need_value, A, grid, &GA);
525 GAstore = (NCformat *) GA.Store;
526 colptr = GAstore->colptr;
527 rowind = GAstore->rowind;
529 if (need_value) a_GA = GAstore->nzval;
530 else assert(GAstore->nzval == NULL);
536 if ((
int) options->RowPerm != (
int) NO)
538 t = SuperLU_timer_();
539 if (Fact != SamePattern_SameRowPerm)
541 if (options->RowPerm == MY_PERMR)
545 for (
i = 0;
i < colptr[n]; ++
i)
548 rowind[
i] = perm_r[irow];
558 if (!(R1 = doubleMalloc_dist(m)))
560 ABORT(
"SUPERLU_MALLOC fails for R1[]");
562 if (!(C1 = doubleMalloc_dist(n)))
564 ABORT(
"SUPERLU_MALLOC fails for C1[]");
571 dldperm(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1);
573 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
574 if (job == 5 && Equil)
576 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
577 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
582 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
583 if (job == 5 && Equil)
585 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
586 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
594 for (
i = 0;
i < n; ++
i)
602 for (j = 0; j < m_loc; ++j)
604 for (
i = rowptr[j];
i < rowptr[j+1]; ++
i)
607 a[
i] *= R1[irow] * C1[icol];
615 for (
i = 0;
i < m; ++
i)
622 for (
i = 0;
i < m; ++
i)
629 for (
i = 0;
i < n; ++
i)
636 for (
i = 0;
i < n; ++
i)
642 ScalePermstruct->DiagScale = BOTH;
648 for (j = 0; j < n; ++j)
650 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
653 rowind[
i] = perm_r[irow];
662 for (j = 0; j < n; ++j)
664 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
667 rowind[
i] = perm_r[irow];
673 t = SuperLU_timer_() -
t;
674 stat.utime[ROWPERM] =
t;
680 for (
i = 0;
i <m; ++
i) perm_r[
i] =
i;
684 if (!factored || options->IterRefine)
688 *(
unsigned char *)norm =
'1';
690 *(
unsigned char *)norm =
'I';
691 anorm = pdlangs(norm, A, grid);
700 t = SuperLU_timer_();
709 permc_spec = options->ColPerm;
710 if (permc_spec != MY_PERMC && Fact == DOFACT)
712 get_perm_c_dist(iam, permc_spec, &GA, perm_c);
715 stat.utime[COLPERM] = SuperLU_timer_() -
t;
721 if (Fact != SamePattern_SameRowPerm)
723 int_t *GACcolbeg, *GACcolend, *GACrowind;
725 sp_colorder(options, &GA, perm_c, etree, &GAC);
728 GACstore = GAC.Store;
729 GACcolbeg = GACstore->colbeg;
730 GACcolend = GACstore->colend;
731 GACrowind = GACstore->rowind;
732 for (j = 0; j < n; ++j)
734 for (
i = GACcolbeg[j];
i < GACcolend[j]; ++
i)
737 GACrowind[
i] = perm_c[irow];
743 t = SuperLU_timer_();
744 if (!(Glu_freeable = (Glu_freeable_t *)
745 SUPERLU_MALLOC(
sizeof(Glu_freeable_t))))
747 ABORT(
"Malloc fails for Glu_freeable.");
751 iinfo = symbfact(options, iam, &GAC, perm_c, etree,
752 Glu_persist, Glu_freeable);
754 stat.utime[SYMBFAC] = SuperLU_timer_() -
t;
758 QuerySpace_dist(n, -iinfo, Glu_freeable,
765 fprintf(stderr,
"symbfact() error returns %d\n", iinfo);
772 for (j = 0; j < nnz_loc; ++j)
774 colind[j] = perm_c[colind[j]];
780 t = SuperLU_timer_();
782 pddistribute(Fact, n, A, ScalePermstruct,
783 Glu_freeable, LUstruct, grid);
784 stat.utime[DIST] = SuperLU_timer_() -
t;
787 if (Fact != SamePattern_SameRowPerm)
789 iinfo = symbfact_SubFree(Glu_freeable);
790 SUPERLU_FREE(Glu_freeable);
794 t = SuperLU_timer_();
795 pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
796 stat.utime[FACT] = SuperLU_timer_() -
t;
799 if (Fact != SamePattern_SameRowPerm)
801 Destroy_CompCol_Matrix_dist(&GA);
802 Destroy_CompCol_Permuted_dist(&GAC);
808 printf(
"Trouble in pdgstrf. Info=%i\n",*info);
811 printf(
"U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
812 printf(
"been completed, but the factor U is exactly singular,\n");
813 printf(
"and division by zero will occur if it is used to solve a\n");
814 printf(
"system of equations.\n");
818 printf(
"The %i-th argument had an illegal value.\n", *info);
825 if (options->SolveInitialized == NO)
827 dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,
831 if (options->IterRefine)
833 if (options->RefineInitialized == NO || Fact == DOFACT)
836 if (options->RefineInitialized)
838 pdgsmv_finalize(SOLVEstruct->gsmv_comm);
840 pdgsmv_init(A, SOLVEstruct->row_to_proc, grid,
841 SOLVEstruct->gsmv_comm);
843 options->RefineInitialized = YES;
848 if ((
doc==0) && (!iam))
850 printf(
"\nstats after setup....\n");
851 PStatPrint(options, &stat, grid);
858 superlu_data->
options = options;
862 superlu_data->
colequ = colequ;
863 superlu_data->
rowequ = rowequ;
864 superlu_data->
anorm = anorm;
865 *data = superlu_data;
877 grid = superlu_data->
grid;
881 if (iam >= nprow * npcol)
886 if ((
doc==0)&&(!iam))
888 printf(
"\nPerforming SuperLU_DIST solve\n");
895 options = superlu_data->
options;
899 colequ = superlu_data->
colequ;
900 rowequ = superlu_data->
rowequ;
901 anorm = superlu_data->
anorm;
904 Astore = (NRformat_loc *) A->Store;
905 nnz_loc = Astore->nnz_loc;
906 m_loc = Astore->m_loc;
907 fst_row = Astore->fst_row;
908 colind = Astore->colind;
909 R = ScalePermstruct->R;
910 C = ScalePermstruct->C;
913 Fact = options->Fact;
914 factored = (Fact == FACTORED);
915 Equil = (!factored && options->Equil == YES);
916 notran = (options->Trans == NOTRANS);
926 for (j = 0; j < nrhs; ++j)
929 for (
i = 0;
i < m_loc; ++
i)
941 for (j = 0; j < nrhs; ++j)
944 for (
i = 0;
i < m_loc; ++
i)
955 if (!(X = doubleMalloc_dist(((
size_t)ldx) * nrhs)))
957 ABORT(
"Malloc fails for X[]");
961 for (j = 0; j < nrhs; ++j)
963 for (
i = 0;
i < m_loc; ++
i)
975 pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc,
976 fst_row, ldb, nrhs, SOLVEstruct, &stat, info);
980 printf(
"Trouble in pdgstrs. Info=%i\n",*info);
981 printf(
"The %i-th argument had an illegal value.\n", *info);
988 if (options->IterRefine)
991 int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;
994 t = SuperLU_timer_();
995 if (options->RefineInitialized == NO || Fact == DOFACT)
1001 SUPERLU_FREE(colind_gsmv);
1003 if (!(it = intMalloc_dist(nnz_loc)))
1005 ABORT(
"Malloc fails for colind_gsmv[]");
1007 colind_gsmv = SOLVEstruct->A_colind_gsmv = it;
1008 for (
i = 0;
i < nnz_loc; ++
i)
1010 colind_gsmv[
i] = colind[
i];
1013 else if (Fact == SamePattern || Fact == SamePattern_SameRowPerm)
1019 for (
i = 0;
i < m_loc; ++
i)
1023 for (j = rowptr[
i]; j < rowptr[
i+1]; ++j)
1026 p = SOLVEstruct->row_to_proc[jcol];
1030 at = a[k]; a[k] = a[j]; a[j] = at;
1038 for (
i = 0;
i < nnz_loc; ++
i)
1040 colind[
i] = colind_gsmv[
i];
1045 if (!(berr = doubleMalloc_dist(nrhs)))
1047 ABORT(
"Malloc fails for berr[].");
1050 pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,
1051 B, ldb, X, ldx, nrhs, SOLVEstruct, berr, &stat, info);
1053 stat.utime[REFINE] = SuperLU_timer_() -
t;
1058 printf(
"Trouble in pdgsrfs. Info=%i\n",*info);
1059 printf(
"The %i-th argument had an illegal value.\n", *info);
1063 if ((
doc==0) && (!iam))
1065 printf(
"\nstats after solve....\n");
1066 PStatPrint(options, &stat, grid);
1070 pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,
1071 SOLVEstruct->inv_perm_c,
1072 X, ldx,
B, ldb, nrhs, grid);
1081 for (j = 0; j < nrhs; ++j)
1084 for (
i = 0;
i < m_loc; ++
i)
1086 b_col[
i] *= C[irow];
1096 for (j = 0; j < nrhs; ++j)
1099 for (
i = 0;
i < m_loc; ++
i)
1101 b_col[
i] *=
R[irow];
1109 if (options->IterRefine)
1124 grid = superlu_data->
grid;
1127 int iam = grid->iam;
1128 if (iam >= nprow * npcol)
goto out;
1129 if ((
doc==0)&&(!iam))
1131 printf(
"\nCleaning up memory allocated for SuperLU_DIST\n");
1137 A = superlu_data->
A;
1138 options = superlu_data->
options;
1146 R = ScalePermstruct->R;
1147 C = ScalePermstruct->C;
1150 Fact = options->Fact;
1151 factored = (Fact == FACTORED);
1152 Equil = (!factored && options->Equil == YES);
1153 notran = (options->Trans == NOTRANS);
1156 if (Equil && Fact != SamePattern_SameRowPerm)
1158 switch (ScalePermstruct->DiagScale)
1179 ScalePermstructFree(ScalePermstruct);
1180 Destroy_LU(n, grid, LUstruct);
1181 LUstructFree(LUstruct);
1182 dSolveFinalize(options, SOLVEstruct);
1186 Destroy_SuperMatrix_Store_dist(A);
1190 SUPERLU_FREE(ScalePermstruct);
1191 SUPERLU_FREE(LUstruct);
1192 SUPERLU_FREE(SOLVEstruct);
1193 SUPERLU_FREE(options);
1197 superlu_gridexit(grid);
1200 SUPERLU_FREE(superlu_data);
1247 int n_in,
int nnz_in,
1249 int *row_index,
int *col_start,
1250 double *b,
int nprow,
int npcol,
1251 int doc,
void **data,
int *info,
1255 superlu_options_t *options;
1259 ScalePermstruct_t *ScalePermstruct;
1260 LUstruct_t *LUstruct;
1269 int_t job, rowequ, colequ, iinfo,
i, j, irow;
1271 int_t *colptr, *rowind;
1272 int_t Equil, factored, notran, permc_spec;
1275 Glu_persist_t *Glu_persist;
1276 Glu_freeable_t *Glu_freeable=NULL;
1280 double *a, *X, *b_col;
1282 double *C, *
R, *C1, *R1, *b_work, *x_col;
1283 double amax,
t, colcnd, rowcnd;
double anorm=0.0;
1284 char equed[1], norm[1];
1318 grid = (gridinfo_t *) SUPERLU_MALLOC(
sizeof(gridinfo_t));
1319 superlu_gridinit(comm, nprow, npcol, grid);
1320 superlu_data->
grid = grid;
1324 if (iam >= nprow * npcol)
1330 options = (superlu_options_t *) SUPERLU_MALLOC(
sizeof(superlu_options_t));
1331 A = (SuperMatrix *) SUPERLU_MALLOC(
sizeof(SuperMatrix));
1332 AC = (SuperMatrix *) SUPERLU_MALLOC(
sizeof(SuperMatrix));
1334 (ScalePermstruct_t *) SUPERLU_MALLOC(
sizeof(ScalePermstruct_t));
1335 LUstruct = (LUstruct_t *) SUPERLU_MALLOC(
sizeof(LUstruct_t));
1338 set_default_options_dist(options);
1341 options->Trans = NOTRANS;
1346 options->RowPerm=LargeDiag;
1350 options->ColPerm=MMD_AT_PLUS_A;
1353 if (allow_permutations==0)
1355 options->ColPerm=NATURAL;
1356 options->RowPerm=NATURAL;
1361 options->IterRefine = SLU_DOUBLE;
1366 options->PrintStat = YES;
1370 options->PrintStat = NO;
1374 if ((!iam)&&(
doc==0))
1376 printf(
"\nPerforming SuperLU_DIST setup\n");
1377 printf(
"Process grid\t%d X %d\n", grid->nprow, grid->npcol);
1378 print_options_dist(options);
1383 dCreate_CompCol_Matrix_dist(A,m,n,nnz,values,row_index,col_start,
1384 SLU_NC,SLU_D,SLU_GE);
1387 ScalePermstructInit(m, n, ScalePermstruct);
1388 LUstructInit(m, n, LUstruct);
1392 Fact = options->Fact;
1393 if (Fact < 0 || Fact > FACTORED)
1397 else if (options->RowPerm < 0 || options->RowPerm > MY_PERMR)
1401 else if (options->ColPerm < 0 || options->ColPerm > MY_PERMC)
1405 else if (options->IterRefine < 0 || options->IterRefine > SLU_EXTRA)
1409 else if (options->IterRefine == SLU_EXTRA)
1412 fprintf(stderr,
"Extra precise iterative refinement yet to support.\n");
1414 else if (A->nrow != A->ncol || A->nrow < 0 ||
1415 A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE)
1419 else if (ldb < A->nrow)
1429 printf(
"Trouble in pdgstrf. Info=%i\n",-*info);
1432 printf(
"Error in options.\n");
1436 printf(
"Error in matrix.\n");
1440 printf(
"ldb < A->nrow\n");
1444 printf(
"nrhs < 0\n");
1450 factored = (Fact == FACTORED);
1451 Equil = (!factored && options->Equil == YES);
1452 notran = (options->Trans == NOTRANS);
1457 colptr = Astore->colptr;
1458 rowind = Astore->rowind;
1459 if (factored || (Fact == SamePattern_SameRowPerm && Equil))
1461 rowequ = (ScalePermstruct->DiagScale == ROW) ||
1462 (ScalePermstruct->DiagScale == BOTH);
1463 colequ = (ScalePermstruct->DiagScale == COL) ||
1464 (ScalePermstruct->DiagScale == BOTH);
1468 rowequ = colequ = FALSE;
1471 perm_r = ScalePermstruct->perm_r;
1472 perm_c = ScalePermstruct->perm_c;
1473 etree = LUstruct->etree;
1474 R = ScalePermstruct->R;
1475 C = ScalePermstruct->C;
1476 Glu_persist = LUstruct->Glu_persist;
1480 switch (ScalePermstruct->DiagScale)
1483 if (!(
R = (
double *) doubleMalloc_dist(m)))
1484 ABORT(
"Malloc fails for R[].");
1485 if (!(C = (
double *) doubleMalloc_dist(n)))
1486 ABORT(
"Malloc fails for C[].");
1487 ScalePermstruct->R =
R;
1488 ScalePermstruct->C = C;
1491 if (!(C = (
double *) doubleMalloc_dist(n)))
1492 ABORT(
"Malloc fails for C[].");
1493 ScalePermstruct->C = C;
1496 if (!(
R = (
double *) doubleMalloc_dist(m)))
1497 ABORT(
"Malloc fails for R[].");
1498 ScalePermstruct->R =
R;
1501 printf(
"diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL);
1502 ABORT(
"Never get here.");
1512 t = SuperLU_timer_();
1514 if (Fact == SamePattern_SameRowPerm)
1517 switch (ScalePermstruct->DiagScale)
1522 for (j = 0; j < n; ++j)
1524 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
1532 for (j = 0; j < n; ++j)
1534 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
1541 for (j = 0; j < n; ++j)
1543 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
1546 a[
i] *=
R[irow] * C[j];
1557 dgsequ_dist(A,
R, C, &rowcnd, &colcnd, &amax, &iinfo);
1559 MPI_Bcast(&iinfo, 1, mpi_int_t, 0, grid->comm);
1562 MPI_Bcast(
R, m, MPI_DOUBLE, 0, grid->comm);
1563 MPI_Bcast(C, n, MPI_DOUBLE, 0, grid->comm);
1564 MPI_Bcast(&rowcnd, 1, MPI_DOUBLE, 0, grid->comm);
1565 MPI_Bcast(&colcnd, 1, MPI_DOUBLE, 0, grid->comm);
1566 MPI_Bcast(&amax, 1, MPI_DOUBLE, 0, grid->comm);
1574 fprintf(stderr,
"The %d-th row of A is exactly zero\n",
1579 fprintf(stderr,
"The %d-th column of A is exactly zero\n",
1588 MPI_Bcast(&iinfo, 1, mpi_int_t, 0, grid->comm);
1591 MPI_Bcast(
R, m, MPI_DOUBLE, 0, grid->comm);
1592 MPI_Bcast(C, n, MPI_DOUBLE, 0, grid->comm);
1593 MPI_Bcast(&rowcnd, 1, MPI_DOUBLE, 0, grid->comm);
1594 MPI_Bcast(&colcnd, 1, MPI_DOUBLE, 0, grid->comm);
1595 MPI_Bcast(&amax, 1, MPI_DOUBLE, 0, grid->comm);
1599 ABORT(
"DGSEQU failed\n");
1604 dlaqgs_dist(A,
R, C, rowcnd, colcnd, amax, equed);
1605 if (lsame_(equed,
"R"))
1607 ScalePermstruct->DiagScale = rowequ = ROW;
1609 else if (lsame_(equed,
"C"))
1611 ScalePermstruct->DiagScale = colequ = COL;
1613 else if (lsame_(equed,
"B"))
1615 ScalePermstruct->DiagScale = BOTH;
1621 ScalePermstruct->DiagScale = NOEQUIL;
1625 stat.utime[EQUIL] = SuperLU_timer_() -
t;
1632 if ((
int) options->RowPerm != (
int) NO)
1634 t = SuperLU_timer_();
1636 if (Fact == SamePattern_SameRowPerm
1637 || options->RowPerm == MY_PERMR)
1642 for (
i = 0;
i < colptr[n]; ++
i)
1645 rowind[
i] = perm_r[irow];
1654 if (!(R1 = (
double *) SUPERLU_MALLOC(m *
sizeof(
double))))
1655 ABORT(
"SUPERLU_MALLOC fails for R1[]");
1656 if (!(C1 = (
double *) SUPERLU_MALLOC(n *
sizeof(
double))))
1657 ABORT(
"SUPERLU_MALLOC fails for C1[]");
1663 dldperm(job, m, nnz, colptr, rowind, a, perm_r, R1, C1);
1665 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
1666 if (job == 5 && Equil)
1668 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
1669 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
1674 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
1675 if (job == 5 && Equil)
1677 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
1678 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
1686 for (
i = 0;
i < n; ++
i)
1691 for (j = 0; j < n; ++j)
1693 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
1696 a[
i] *= R1[irow] * C1[j];
1697 rowind[
i] = perm_r[irow];
1704 for (
i = 0;
i < m; ++
i)
1711 for (
i = 0;
i < m; ++
i)
1718 for (
i = 0;
i < n; ++
i)
1725 for (
i = 0;
i < n; ++
i)
1731 ScalePermstruct->DiagScale = BOTH;
1732 rowequ = colequ = 1;
1739 for (
i = colptr[0];
i < colptr[n]; ++
i)
1742 rowind[
i] = perm_r[irow];
1752 for (j = 0; j < n; ++j)
1754 for (
i = colptr[j];
i < colptr[j+1]; ++
i)
1757 rowind[
i] = perm_r[irow];
1763 t = SuperLU_timer_() -
t;
1764 stat.utime[ROWPERM] =
t;
1767 if (!factored || options->IterRefine)
1772 *(
unsigned char *)norm =
'1';
1776 *(
unsigned char *)norm =
'I';
1778 anorm = dlangs_dist(norm, A);
1788 t = SuperLU_timer_();
1797 permc_spec = options->ColPerm;
1798 if (permc_spec != MY_PERMC && Fact == DOFACT)
1801 get_perm_c_dist(iam, permc_spec, A, perm_c);
1808 sp_colorder(options, A, perm_c, etree, AC);
1811 ACstore = AC->Store;
1812 for (j = 0; j < n; ++j)
1814 for (
i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++
i)
1816 irow = ACstore->rowind[
i];
1817 ACstore->rowind[
i] = perm_c[irow];
1820 stat.utime[COLPERM] = SuperLU_timer_() -
t;
1824 if (Fact != SamePattern_SameRowPerm)
1826 t = SuperLU_timer_();
1827 if (!(Glu_freeable = (Glu_freeable_t *)
1828 SUPERLU_MALLOC(
sizeof(Glu_freeable_t))))
1829 ABORT(
"Malloc fails for Glu_freeable.");
1831 iinfo = symbfact(options, iam, AC, perm_c, etree,
1832 Glu_persist, Glu_freeable);
1834 stat.utime[SYMBFAC] = SuperLU_timer_() -
t;
1838 QuerySpace_dist(n, -iinfo, Glu_freeable,
1845 fprintf(stderr,
"symbfact() error returns %d\n", iinfo);
1852 t = SuperLU_timer_();
1854 ddistribute(Fact, n, AC, Glu_freeable, LUstruct, grid);
1855 stat.utime[DIST] = SuperLU_timer_() -
t;
1858 if (Fact != SamePattern_SameRowPerm)
1860 iinfo = symbfact_SubFree(Glu_freeable);
1861 SUPERLU_FREE(Glu_freeable);
1865 t = SuperLU_timer_();
1866 pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
1867 stat.utime[FACT] = SuperLU_timer_() -
t;
1869 else if (options->IterRefine)
1875 sp_colorder(options, A, perm_c, NULL, AC);
1880 printf(
"Trouble in pdgstrf. Info=%i\n",*info);
1883 printf(
"U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
1884 printf(
"been completed, but the factor U is exactly singular,\n");
1885 printf(
"and division by zero will occur if it is used to solve a\n");
1886 printf(
"system of equations.\n");
1890 printf(
"The %i-th argument had an illegal value.\n", *info);
1895 if ((
doc==0) && (!iam))
1897 printf(
"\nstats after setup....\n");
1898 PStatPrint(options, &stat, grid);
1904 superlu_data->
A = A;
1905 superlu_data->
AC = AC;
1906 superlu_data->
options = options;
1909 superlu_data->
colequ = colequ;
1910 superlu_data->
rowequ = rowequ;
1911 superlu_data->
anorm = anorm;
1912 *data = superlu_data;
1924 grid = superlu_data->
grid;
1928 if (iam >= nprow * npcol)
1933 if ((
doc==0)&&(!iam))
1935 printf(
"\nPerforming SuperLU_DIST solve\n");
1941 A = superlu_data->
A;
1942 AC = superlu_data->
AC;
1943 options = superlu_data->
options;
1946 colequ = superlu_data->
colequ;
1947 rowequ = superlu_data->
rowequ;
1948 anorm = superlu_data->
anorm;
1952 colptr = Astore->colptr;
1953 rowind = Astore->rowind;
1954 R = ScalePermstruct->R;
1955 C = ScalePermstruct->C;
1956 perm_r = ScalePermstruct->perm_r;
1957 perm_c = ScalePermstruct->perm_c;
1960 Fact = options->Fact;
1961 factored = (Fact == FACTORED);
1962 Equil = (!factored && options->Equil == YES);
1963 notran = (options->Trans == NOTRANS);
1969 if (!(b_work = doubleMalloc_dist(n)))
1971 ABORT(
"Malloc fails for b_work[]");
1982 for (j = 0; j < nrhs; ++j)
1984 for (
i = 0;
i < m; ++
i)
1995 for (j = 0; j < nrhs; ++j)
1997 for (
i = 0;
i < m; ++
i)
2008 if ((
int) options->RowPerm != (
int) NO)
2013 for (j = 0; j < nrhs; ++j)
2015 for (
i = 0;
i < m; ++
i)
2017 b_work[perm_r[
i]] = b_col[
i];
2019 for (
i = 0;
i < m; ++
i)
2021 b_col[
i] = b_work[
i];
2035 for (j = 0; j < nrhs; ++j)
2037 for (
i = 0;
i < m; ++
i)
2039 b_work[perm_c[
i]] = b_col[
i];
2041 for (
i = 0;
i < m; ++
i)
2043 b_col[
i] = b_work[
i];
2052 if (!(X = doubleMalloc_dist(((
size_t)ldx) * nrhs)))
2054 ABORT(
"Malloc fails for X[]");
2059 for (j = 0; j < nrhs; ++j)
2061 for (
i = 0;
i < ldb; ++
i)
2063 x_col[
i] = b_col[
i];
2072 pdgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, &stat, info);
2075 printf(
"Trouble in pdgstrs_Bglobal. Info=%i\n",*info);
2076 printf(
"The %i-th argument had an illegal value.\n", *info);
2083 if (options->IterRefine)
2086 t = SuperLU_timer_();
2089 if (!(berr = doubleMalloc_dist(nrhs)))
2091 ABORT(
"Malloc fails for berr[].");
2094 pdgsrfs_ABXglobal(n, AC, anorm, LUstruct, grid,
B, ldb,
2095 X, ldx, nrhs, berr, &stat, info);
2098 printf(
"Trouble in pdgsrfs_ABXglobal. Info=%i\n",*info);
2099 printf(
"The %i-th argument had an illegal value.\n", *info);
2101 stat.utime[REFINE] = SuperLU_timer_() -
t;
2105 if ((
doc==0) && (!iam))
2107 printf(
"\nstats after solve....\n");
2108 PStatPrint(options, &stat, grid);
2112 for (j = 0; j < nrhs; j++)
2116 for (
i = 0;
i < n; ++
i)
2118 b_col[
i] = x_col[perm_c[
i]];
2129 for (j = 0; j < nrhs; ++j)
2131 for (
i = 0;
i < n; ++
i)
2142 for (j = 0; j < nrhs; ++j)
2144 for (
i = 0;
i < n; ++
i)
2154 if (options->IterRefine)
2158 SUPERLU_FREE(b_work);
2170 grid = superlu_data->
grid;
2174 if (iam >= nprow * npcol)
goto out;
2175 if ((
doc==0)&&(!iam))
2177 printf(
"\nCleaning up memory allocated for SuperLU_DIST\n");
2183 A = superlu_data->
A;
2184 AC = superlu_data->
AC;
2185 options = superlu_data->
options;
2192 R = ScalePermstruct->R;
2193 C = ScalePermstruct->C;
2196 Fact = options->Fact;
2197 factored = (Fact == FACTORED);
2198 Equil = (!factored && options->Equil == YES);
2199 rowequ = colequ = FALSE;
2200 if (factored || (Fact == SamePattern_SameRowPerm && Equil))
2202 rowequ = (ScalePermstruct->DiagScale == ROW) ||
2203 (ScalePermstruct->DiagScale == BOTH);
2204 colequ = (ScalePermstruct->DiagScale == COL) ||
2205 (ScalePermstruct->DiagScale == BOTH);
2209 rowequ = colequ = FALSE;
2213 if (Equil && Fact != SamePattern_SameRowPerm)
2215 switch (ScalePermstruct->DiagScale)
2234 if (!factored || (factored && options->IterRefine))
2236 Destroy_CompCol_Permuted_dist(AC);
2240 ScalePermstructFree(ScalePermstruct);
2241 Destroy_LU(n, grid, LUstruct);
2242 LUstructFree(LUstruct);
2245 Destroy_SuperMatrix_Store_dist(A);
2250 SUPERLU_FREE(ScalePermstruct);
2251 SUPERLU_FREE(LUstruct);
2252 SUPERLU_FREE(options);
2256 superlu_gridexit(grid);
2259 SUPERLU_FREE(superlu_data);
int Memory_usage_has_been_recorded
ScalePermstruct_t * ScalePermstruct
SOLVEstruct_t * SOLVEstruct
superlu_options_t * options
double get_total_memory_usage_in_bytes_dist()
struct MemoryStatisticsStorage symbolic_memory_statistics_storage
double get_lu_factor_memory_usage_in_bytes_dist()
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
void get_memory_usage_in_bytes_dist(double *lu_factor_memory, double *total_memory)
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n_in, int nnz_in, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)