Parallelization

MPI-GMRES 2D

To launch this script, use for example:

ff-mpirun -np 12 MPIGMRES2D.edp -d 1 -k 1 -gmres 2 -n 50

//usage :
//ff-mpirun [mpi parameter] MPIGMRES2d.edp [-glut ffglut] [-n N] [-k K] [-d D] [-ns] [-gmres [0|1]
//arguments:
//-glut ffglut : to see graphicaly the process
//-n N: set the mesh cube split NxNxN
//-d D: set debug flag D must be one for mpiplot
//-k K: to refined by K all element
//-ns: remove script dump
//-gmres
//0: use iterative schwarz algo.
//1: Algo GMRES on residu of schwarz algo
//2: DDM GMRES
//3: DDM GMRES with coarse grid preconditionner (Good one)

load "MPICG"
load "medit"
load "metis"
include "getARGV.idp"
include "MPIplot.idp"
include "MPIGMRESmacro.idp"

searchMethod = 0; //more safe seach algo (warning can be very expensive in case of lot of ouside point)
assert(version >= 3.11); //need at least v3.11
real[int] ttt(10);
int ittt=0;
macro settt {ttt[ittt++] = mpiWtime();}//

// Arguments
verbosity = getARGV("-vv", 0);
int vdebug = getARGV("-d", 1);
int ksplit = getARGV("-k", 3);
int nloc = getARGV("-n", 10);
string sff = getARGV("-p", "");
int gmres = getARGV("-gmres", 2);
bool dplot = getARGV("-dp", 0);
int nC = getARGV("-N", max(nloc/10, 4));

if (mpirank==0 && verbosity){
    cout << "ARGV: ";
    for (int i = 0; i < ARGV.n; ++i)
        cout << ARGV[i] << " ";
    cout << endl;
}

if(mpirank==0 && verbosity)
cout << " vdebug: " << vdebug << ", kspilt "<< ksplit << ", nloc "<< nloc << ", sff "<< sff << "." << endl;

// Parameters
int withplot = 0;
bool withmetis = 1;
bool RAS = 1;
string sPk = "P2-2gd";
func Pk = P2;
int sizeoverlaps = 1; //size of overlap
int[int] l111 = [1, 1, 1, 1]; //mesh labels

// MPI function
func bool plotMPIall(mesh &Th, real[int] &u, string cm){
    if(vdebug)
        PLOTMPIALL(mesh, Pk, Th, u, {cmm=cm, nbiso=20, fill=1, dim=3, value=1});
    return 1;
}

// MPI
mpiComm comm(mpiCommWorld,0,0); //trick : make a no split mpiWorld

int npart = mpiSize(comm); //total number of partion
int ipart = mpiRank(comm); //current partition number

int njpart = 0; //Number of part with intersection (a jpart) with ipart without ipart
int[int] jpart(npart); //list of jpart
if(ipart==0)
    cout << " Final N = " << ksplit*nloc << ", nloc = " << nloc << ", split = " << ksplit << endl;
settt

// Mesh
mesh Thg = square(nloc, nloc, label=l111);
mesh ThC = square(nC, nC, label=l111);// Coarse mesh

mesh Thi, Thin; //with overlap, without olverlap

// Fespace
fespace Phg(Thg, P0);
Phg part;

fespace Vhg(Thg, P1);
Vhg unssd; //boolean function: 1 in the subdomain, 0 elswhere

fespace VhC(ThC, P1); // of the coarse problem

// Partitioning
{
    int[int] nupart(Thg.nt);
    nupart = 0;
    if (npart > 1 && ipart == 0)
        metisdual(nupart, Thg, npart);

    broadcast(processor(0, comm), nupart);
    for(int i = 0; i < nupart.n; ++i)
        part[][i] = nupart[i];
}

if (withplot > 1)
    plot(part, fill=1, cmm="dual", wait=1);

// Overlapping partition
Phg suppi = abs(part-ipart) < 0.1;

Thin = trunc(Thg, suppi>0, label=10); // non-overlapping mesh, interfaces have label 10
int nnn = sizeoverlaps*2;// to be sure
AddLayers(Thg, suppi[], nnn, unssd[]); //see above! suppi and unssd are modified
unssd[] *= nnn; //to put value nnn a 0
real nnn0 = nnn - sizeoverlaps + 0.001;
Thi = trunc(Thg, unssd>nnn0, label=10); //overlapping mesh, interfaces have label 10

settt

// Fespace
fespace Vhi(Thi,P1);
int npij = npart;
Vhi[int] pij(npij); //local partition of unit + pii
Vhi pii;

real nnn1 = +0.001;
{
    /*
    construction of the partition of the unit,
    let phi_i P1 FE function 1 on Thin and zero ouside of Thi and positive
    the partition is build with
    p_i = phi_i/ \sum phi_i

    to build the partition of one domain i
    we nned to find all j such that supp(phi_j) \cap supp(phi_j) is not empty
    <=> int phi_j
    */
    //build a local mesh of thii such that all computation of the unit partition are
    //exact in thii
    mesh Thii = trunc(Thg, unssd>nnn1, label=10); //overlapping mesh, interfaces have label 10

    {
        //find all j mes (supp(p_j) cap supp(p_i)) >0
        //compute all phi_j on Thii
        //remark: supp p_i include in Thi

        // Fespace
        fespace Phii(Thii, P0);
        fespace Vhii(Thii, P1);
        Vhi sumphi = 0;
        Vhii phii = 0;

        jpart = 0;
        njpart = 0;
        int nlayer = RAS ? 1 : sizeoverlaps;
        if (ipart == 0)
            cout << "nlayer = " << nlayer << endl;
        pii = max(unssd-nnn+nlayer, 0.)/nlayer;
        if(dplot)
            plot(pii, wait=1, cmm=" 0000");
        sumphi[] += pii[];
        if(dplot)
            plot(sumphi, wait=1, cmm=" summ 0000");

        real epsmes = 1e-10*Thii.area;
        for (int i = 0; i < npart; ++i)
            if (i != ipart){
            Phii suppii = abs(i-part) < 0.2;
            if (suppii[].max > 0.5){
                AddLayers(Thii, suppii[], nlayer, phii[]);
                assert(phii[].min >= 0);
                real interij = int2d(Thi)(phii);
                if (interij > epsmes){
                    pij[njpart] = abs(phii);
                    if(vdebug > 2)
                        cout << " ***** " << int2d(Thi)(real(pij[njpart])<0) << " " <<pij[njpart][].min << " " << phii[].min << endl;
                    assert(int2d(Thi)(real(pij[njpart]) < 0) == 0);
                    if(dplot)
                        plot(pij[njpart], wait=1, cmm=" j = "+ i + " " + njpart);
                    sumphi[] += pij[njpart][];
                    if(dplot)
                        plot(sumphi, wait=1, cmm=" sum j = "+ i + " " + njpart);
                    jpart[njpart++] = i;
                }
            }
        }

        if(dplot)
            plot(sumphi, wait=1, dim=3, cmm="sum ", fill=1);
        pii[] = pii[] ./ sumphi[];
        for (int j = 0; j < njpart; ++j)
            pij[j][] = pij[j][] ./ sumphi[];
        jpart.resize(njpart);
        for (int j = 0; j < njpart; ++j)
            assert(pij[j][].max <= 1);
        {
            cout << ipart << " number of jpart " << njpart << " : ";
            for (int j = 0; j < njpart; ++j)
                cout << jpart[j] << " ";
            cout << endl;
        }
        sumphi[] = pii[];
        for (int j = 0; j < njpart; ++j)
            sumphi[] += pij[j][];
        if(vdebug > 2)
            cout << "sum min " << sumphi[].min << " " << sumphi[].max << endl;
        assert(sumphi[].min > 1.-1e-6 && sumphi[].max < 1.+1e-6);
    }
} //Thii is remove here
// end of the construction of the local partition of the unity ...
// on Thi
if (ipart == 0)
    cout << "End build partition" << endl;

// Computation of number of intersection
//here pii and the pij is the local partition of the unit on
//Thi (mesh with overlap)
if ( dplot){
    plot(Thi, wait=1);
    for(int j = 0; j < njpart; ++j)
        plot(pij[j], cmm=" j="+j, wait=1);
}

//Partition of the unity on Thi
//computation of message
//all j > we have to receive
//data on intersection of the support of pij[0] and pij[j]
settt

if(vdebug)
    plotMPIall(Thi, pii[], "pi_i");

mesh[int] aThij(njpart);
matrix Pii;
matrix[int] sMj(njpart); //M of send to j
matrix[int] rMj(njpart); //M to recv from j
fespace Whi(Thi, Pk);
mesh Thij = Thi;
fespace Whij(Thij, Pk);//

//construction of the mesh intersect i,j part
for(int jp = 0; jp < njpart; ++jp)
    aThij[jp] = trunc(Thi, pij[jp] > 1e-6, label=10); //mesh of the supp of pij

for(int jp = 0; jp < njpart; ++jp)
    aThij[jp] = trunc(aThij[jp], 1, split=ksplit);

Thi = trunc(Thi, 1, split=ksplit);

settt

if (ipart == 0)
    cout << "End build mesh intersection" << endl;

// Construction of transfert matrix
{
    Whi wpii = pii;
    Pii = wpii[];
    for(int jp = 0; jp < njpart; ++jp){
        int j = jpart[jp];
        Thij = aThij[jp];
        matrix I = interpolate(Whij, Whi); //Whji <- Whi
        sMj[jp] = I*Pii; //Whi -> s Whij
        rMj[jp] = interpolate(Whij, Whi, t=1); //Whji -> Whi
        if(vdebug > 10){
            {Whi uuu=1; Whij vvv=-1; vvv[]+=I*uuu[]; cout << jp << " %%% " << vvv[].linfty << endl; assert(vvv[].linfty < 1e-6);}
            {Whi uuu=1; Whij vvv=-1; vvv[]+=rMj[jp]'*uuu[]; cout << jp << " ### " << vvv[].linfty << endl; assert(vvv[].linfty < 1e-6);}
        }
    }
}
if (ipart == 0)
    cout << "End build transfert matrix" << endl;

// Allocate array of send and recv data
InitU(njpart, Whij, Thij, aThij, Usend) //initU(n, Vh, Th, aTh, U)
InitU(njpart, Whij, Thij, aThij, Vrecv)
if (ipart == 0)
    cout << "End init data for send/revc" << endl;

Whi ui, vi;

func bool Update(real[int] &ui, real[int] &vi){
    for(int j = 0; j < njpart; ++j)
        Usend[j][] = sMj[j]*ui;
    SendRecvUV(comm, jpart, Usend, Vrecv)
    vi = Pii*ui;
    for(int j = 0; j < njpart; ++j)
        vi += rMj[j]*Vrecv[j][];
    return true;
}

// Definition of the Problem
func G = x*0.1;
func F = 1.;
macro grad(u) [dx(u),dy(u)] //
varf vBC (U, V) = on(1, U=G);
varf vPb (U, V) = int2d(Thi)(grad(U)'*grad(V)) + int2d(Thi)(F*V) + on(10, U=0) + on(1, U=G);
varf vPbC (U, V) = int2d(ThC)(grad(U)'*grad(V)) + on(1, U=0);
varf vPbon (U, V) = on(10, U=1) + on(1, U=1);
varf vPbon10only (U, V) = on(10, U=1) + on(1, U=0);
//remark the order is important we want 0 part on 10 and 1

matrix Ai = vPb(Whi, Whi, solver=sparsesolver);
matrix AC, Rci, Pci;

if (mpiRank(comm) == 0)
    AC = vPbC(VhC, VhC, solver=sparsesolver);

Pci = interpolate(Whi, VhC);
Rci = Pci'*Pii;

real[int] onG10 = vPbon10only(0, Whi);
real[int] onG = vPbon(0, Whi);
real[int] Bi=vPb(0, Whi);

int kiter = -1;

func bool CoarseSolve(real[int] &V, real[int] &U, mpiComm &comm){
    //solving the coarse probleme
    real[int] Uc(Rci.n), Bc(Uc.n);
    Uc = Rci*U;
    mpiReduce(Uc, Bc, processor(0, comm), mpiSUM);
    if (mpiRank(comm) == 0)
        Uc = AC^-1*Bc;
    broadcast(processor(0, comm), Uc);
    V = Pci*Uc;
}

func real[int] DJ (real[int] &U){
    ++kiter;
    real[int] V(U.n);
    V = Ai*U;
    V = onG10 ? 0.: V; //remove internal boundary
    return V;
}

func real[int] PDJ (real[int] &U){
    real[int] V(U.n);

    real[int] b = onG10 ? 0. : U;
    V = Ai^-1*b;
    Update(V, U);
    return U;
}

func real[int] PDJC (real[int] &U){
    real[int] V(U.n);
    CoarseSolve(V, U, comm);
    V = -V; //-C2*Uo
    U += Ai*V; //U = (I-A C2) Uo
    real[int] b = onG10 ? 0. : U;
    U = Ai^-1*b; // (C1( I -A C2) Uo
    V = U -V;
    Update(V, U);
    return U;
}

func real[int] DJ0(real[int] &U){
    ++kiter;
    real[int] V(U.n);
    real[int] b = onG .* U;
    b = onG ? b : Bi ;
    V = Ai^-1*b;
    Update(V, U);
    V -= U;
    return V;
}

Whi u = 0, v;
{ //verification
    Whi u = 1, v;
    Update(u[], v[]);
    u[] -= v[];
    assert(u[].linfty < 1e-6);
}

settt
u[] = vBC(0, Whi, tgv=1); //set u with tgv BC value

real epss = 1e-6;
int rgmres = 0;
if (gmres == 1){
    rgmres = MPIAffineGMRES(DJ0, u[], veps=epss, nbiter=300, comm=comm, dimKrylov=100, verbosity=ipart ? 0: 50);
    real[int] b = onG .* u[];
    b = onG ? b : Bi;
    v[] = Ai^-1*b;
    Update(v[], u[]);
}
else if (gmres == 2)
    rgmres = MPILinearGMRES(DJ, precon=PDJ, u[], Bi, veps=epss, nbiter=300, comm=comm, dimKrylov=100, verbosity=ipart ? 0: 50);
else if (gmres == 3)
    rgmres = MPILinearGMRES(DJ, precon=PDJC, u[], Bi, veps=epss, nbiter=300, comm=comm, dimKrylov=100, verbosity=ipart ? 0: 50);
else //algo Shwarz for demo
    for(int iter = 0; iter < 10; ++iter){
        real[int] b = onG .* u[];
        b = onG ? b : Bi ;
        v[] = Ai^-1*b;

        Update(v[], u[]);
        if(vdebug)
            plotMPIall(Thi, u[], "u-"+iter);
        v[] -= u[];

        real err = v[].linfty;
        real umax = u[].max;
        real[int] aa = [err, umax], bb(2);
        mpiAllReduce(aa, bb, comm, mpiMAX);
        real errg = bb[0];
        real umaxg = bb[1];

        if (ipart == 0)
            cout << ipart << " err = " << errg << " u. max " << umaxg << endl;
        if (errg < 1e-5) break;
    }

if (vdebug)
    plotMPIall(Thi, u[], "u-final");

settt

real errg = 1, umaxg;
{
    real umax = u[].max, umaxg;
    real[int] aa = [umax], bb(1);
    mpiAllReduce(aa, bb, comm, mpiMAX);
    errg = bb[0];
    if (ipart == 0)
        cout << "umax global = " << bb[0] << " Wtime = " << (ttt[ittt-1]-ttt[ittt-2]) << " s " << " " << kiter << endl;
}

if (sff != ""){
    ofstream ff(sff+".txt", append);
    cout << " ++++ ";
    cout << mpirank << "/" << mpisize << " k=" << ksplit << " n= " << nloc << " " << sizeoverlaps << " it= " << kiter;
    for (int i = 1; i < ittt; ++i)
        cout << " " << ttt[i]-ttt[i-1] << " ";
    cout << epss << " " << Ai.nbcoef << " " << Ai.n << endl;

    /*
    1 mpirank
    2 mpisize
    3 ksplit
    4 nloc
    5 sizeoverlaps
    6 kiter
    7 mesh & part build
    8 build the partion
    9 build mesh, transfere , and the fine mesh ..
    10 build the matrix, the trans matrix, factorizatioon
    11 GMRES
    */

    ff << mpirank << " " << mpisize << " " << sPk << " ";
    ff << ksplit << " " << nloc << " " << sizeoverlaps << " " << kiter;
    for (int i = 1; i < ittt; ++i)
        ff << " " << ttt[i]-ttt[i-1] << " ";
    ff << epss << " " << Ai.nbcoef << " " << Ai.n << " " << gmres << endl;
}

../_images/MPIGMRES2D.png — Fig. 222 Results

MPI-GMRES 3D

Todo

todo

Direct solvers

load "MUMPS_FreeFem"
//default solver: real-> MUMPS, complex -> MUMPS
load "real_SuperLU_DIST_FreeFem"
default solver: real-> SuperLU_DIST, complex -> MUMPS
load "real_pastix_FreeFem"
//default solver: real-> pastix, complex -> MUMPS

// Solving with pastix
{
    matrix A =
        [[1, 2, 2, 1, 1],
        [ 2, 12, 0, 10 , 10],
        [ 2, 0, 1, 0, 2],
        [ 1, 10, 0, 22, 0.],
        [ 1, 10, 2, 0., 22]];

    real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
    b = A*xx;
    cout << "b = " << b << endl;
    cout << "xx = " << xx << endl;

    set(A, solver=sparsesolver, datafilename="ffpastix_iparm_dparm.txt");
    cout << "solve" << endl;
    x = A^-1*b;
    cout << "b = " << b << endl;
    cout << "x = " << endl;
    cout << x << endl;
    di = xx - x;
    if (mpirank == 0){
        cout << "x-xx = " << endl;
        cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
    }
}

// Solving with SuperLU_DIST
realdefaulttoSuperLUdist();
//default solver: real-> SuperLU_DIST, complex -> MUMPS
{
    matrix A =
        [[1, 2, 2, 1, 1],
        [ 2, 12, 0, 10 , 10],
        [ 2, 0, 1, 0, 2],
        [ 1, 10, 0, 22, 0.],
        [ 1, 10, 2, 0., 22]];

    real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
    b = A*xx;
    cout << "b = " << b << endl;
    cout << "xx = " << xx << endl;

    set(A, solver=sparsesolver, datafilename="ffsuperlu_dist_fileparam.txt");
    cout << "solve" << endl;
    x = A^-1*b;
    cout << "b = " << b << endl;
    cout << "x = " << endl;
    cout << x << endl;
    di = xx - x;
    if (mpirank == 0){
        cout << "x-xx = " << endl;
        cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
    }
}

// Solving with MUMPS
defaulttoMUMPS();
//default solver: real-> MUMPS, complex -> MUMPS
{
    matrix A =
        [[1, 2, 2, 1, 1],
        [ 2, 12, 0, 10 , 10],
        [ 2, 0, 1, 0, 2],
        [ 1, 10, 0, 22, 0.],
        [ 1, 10, 2, 0., 22]];

    real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
    b = A*xx;
    cout << "b = " << b << endl;
    cout << "xx = " << xx << endl;

    set(A, solver=sparsesolver, datafilename="ffmumps_fileparam.txt");
    cout << "solving solution" << endl;
    x = A^-1*b;
    cout << "b = " << b << endl;
    cout << "x = " << endl;
    cout << x << endl;
    di = xx - x;
    if (mpirank == 0){
        cout << "x-xx = " << endl;
        cout << "Linf = " << di.linfty << ", L2 " << di.l2 << endl;
    }
}

Solver MUMPS

load "MUMPS_FreeFem"

// Parameters
int[int] ICNTL(40); //declaration of ICNTL parameter for MUMPS

//get value of ICNTL from file
if (mpirank == 0){
    ifstream ff("ffmumps_fileparam.txt");
    string line;
    getline(ff, line);
    getline(ff, line);
    for (int iii = 0; iii < 40; iii++){
        ff >> ICNTL[iii];
        getline(ff, line);
    }
}

broadcast(processor(0), ICNTL);

// Given data of MUMPS solver in array lparams(SYM, PAR, ICNTL)
// There is no symmetric storage for a matrix associated with a sparse solver.
// Therefore, the matrix will be considered unsymmetric for parallel sparse solver even if symmetric.
{
    // Problem
    int SYM = 0;
    int PAR = 1;
    matrix A =
        [
            [40, 0, 45, 0, 0],
            [0, 12, 0, 0, 0],
            [0, 0, 40, 0, 0],
            [12, 0, 0, 22, 0],
            [0, 0, 20, 0, 22]
        ];

    // Construction of integer parameter for MUMPS
    int[int] MumpsLParams(42);
    MumpsLParams[0] = SYM;
    MumpsLParams[1] = PAR;
    for (int ii = 0; ii < 40; ii++)
        MumpsLParams[ii+2] = ICNTL[ii]; //ICNTL begin with index 0 here

    real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
    b = A*xx;
    if (mpirank == 0)
        cout << "xx = " << xx << endl;

    set(A, solver=sparsesolver, lparams=MumpsLParams); //we take the default value for CNTL MUMPS parameter

    // Solve
    if (mpirank == 0)
        cout << "Solve" << endl;
    x = A^-1*b;
    if (mpirank == 0)
        cout << "b = " << b << endl;
    if (mpirank == 0)
        cout << "x = " << endl; cout << x << endl;
    di = xx-x;
    if (mpirank == 0){
        cout << "x-xx = " << endl;
        cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
    }
}

// Read parameter of MUMPS solver in file ffmumps_fileparam.txt
{
    // Problem
    matrix A =
        [
            [40, 0, 45, 0, 0],
            [0, 12, 0, 0 , 0],
            [0, 0, 40, 0, 0],
            [12, 0, 0, 22, 0],
            [0, 0, 20, 0, 22]
        ];

    real[int] xx = [1, 32, 45, 7000, 2], x(5), b(5), di(5);
    b = A*xx;
    if (mpirank == 0){
        cout << "b = " << b << endl;
        cout << "xx = " << xx << endl;
    }

    set(A, solver=sparsesolver, datafilename="ffmumps_fileparam.txt");

    // Solve
    if (mpirank == 0)
        cout << "Solve" << endl;
    x = A^-1*b;

    if (mpirank == 0){
        cout << "b = " << b << endl;
        cout << "x = " << x << endl;
    }
    di = xx-x;
    if (mpirank == 0){
        cout << "x-xx = " << endl;
        cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
    }
}

Solver superLU_DIST

Todo

write code (SuperLU_DIST seems to have a bug)

Solver PaStiX

Todo

write code (PaStiX seems to have a bug)

Here you can use the search bar to search in all FreeFEM examples instead

The list of all matching examples on the right is updated for each new query

Clicking on a search hit or on an example on the right brings up the corresponding script from GitHub

You can also filter the list on the right by selecting one or more intersecting categories on the left tree

Parallelization

MPI-GMRES 2D

MPI-GMRES 3D

Direct solvers

Solver MUMPS

Solver superLU_DIST

Solver PaStiX