Parallelization
MPI-GMRES 2D
To launch this script, use for example:
1ff-mpirun -np 12 MPIGMRES2D.edp -d 1 -k 1 -gmres 2 -n 50
1//usage :
2//ff-mpirun [mpi parameter] MPIGMRES2d.edp [-glut ffglut] [-n N] [-k K] [-d D] [-ns] [-gmres [0|1]
3//arguments:
4//-glut ffglut : to see graphicaly the process
5//-n N: set the mesh cube split NxNxN
6//-d D: set debug flag D must be one for mpiplot
7//-k K: to refined by K all element
8//-ns: remove script dump
9//-gmres
10//0: use iterative schwarz algo.
11//1: Algo GMRES on residu of schwarz algo
12//2: DDM GMRES
13//3: DDM GMRES with coarse grid preconditionner (Good one)
14
15load "MPICG"
16load "medit"
17load "metis"
18include "getARGV.idp"
19include "MPIplot.idp"
20include "MPIGMRESmacro.idp"
21
22searchMethod = 0; //more safe seach algo (warning can be very expensive in case of lot of ouside point)
23assert(version >= 3.11); //need at least v3.11
24real[int] ttt(10);
25int ittt=0;
26macro settt {ttt[ittt++] = mpiWtime();}//
27
28// Arguments
29verbosity = getARGV("-vv", 0);
30int vdebug = getARGV("-d", 1);
31int ksplit = getARGV("-k", 3);
32int nloc = getARGV("-n", 10);
33string sff = getARGV("-p", "");
34int gmres = getARGV("-gmres", 2);
35bool dplot = getARGV("-dp", 0);
36int nC = getARGV("-N", max(nloc/10, 4));
37
38if (mpirank==0 && verbosity){
39 cout << "ARGV: ";
40 for (int i = 0; i < ARGV.n; ++i)
41 cout << ARGV[i] << " ";
42 cout << endl;
43}
44
45if(mpirank==0 && verbosity)
46cout << " vdebug: " << vdebug << ", kspilt "<< ksplit << ", nloc "<< nloc << ", sff "<< sff << "." << endl;
47
48// Parameters
49int withplot = 0;
50bool withmetis = 1;
51bool RAS = 1;
52string sPk = "P2-2gd";
53func Pk = P2;
54int sizeoverlaps = 1; //size of overlap
55int[int] l111 = [1, 1, 1, 1]; //mesh labels
56
57// MPI function
58func bool plotMPIall(mesh &Th, real[int] &u, string cm){
59 if(vdebug)
60 PLOTMPIALL(mesh, Pk, Th, u, {cmm=cm, nbiso=20, fill=1, dim=3, value=1});
61 return 1;
62}
63
64// MPI
65mpiComm comm(mpiCommWorld,0,0); //trick : make a no split mpiWorld
66
67int npart = mpiSize(comm); //total number of partion
68int ipart = mpiRank(comm); //current partition number
69
70int njpart = 0; //Number of part with intersection (a jpart) with ipart without ipart
71int[int] jpart(npart); //list of jpart
72if(ipart==0)
73 cout << " Final N = " << ksplit*nloc << ", nloc = " << nloc << ", split = " << ksplit << endl;
74settt
75
76// Mesh
77mesh Thg = square(nloc, nloc, label=l111);
78mesh ThC = square(nC, nC, label=l111);// Coarse mesh
79
80mesh Thi, Thin; //with overlap, without olverlap
81
82// Fespace
83fespace Phg(Thg, P0);
84Phg part;
85
86fespace Vhg(Thg, P1);
87Vhg unssd; //boolean function: 1 in the subdomain, 0 elswhere
88
89fespace VhC(ThC, P1); // of the coarse problem
90
91// Partitioning
92{
93 int[int] nupart(Thg.nt);
94 nupart = 0;
95 if (npart > 1 && ipart == 0)
96 metisdual(nupart, Thg, npart);
97
98 broadcast(processor(0, comm), nupart);
99 for(int i = 0; i < nupart.n; ++i)
100 part[][i] = nupart[i];
101}
102
103if (withplot > 1)
104 plot(part, fill=1, cmm="dual", wait=1);
105
106// Overlapping partition
107Phg suppi = abs(part-ipart) < 0.1;
108
109Thin = trunc(Thg, suppi>0, label=10); // non-overlapping mesh, interfaces have label 10
110int nnn = sizeoverlaps*2;// to be sure
111AddLayers(Thg, suppi[], nnn, unssd[]); //see above! suppi and unssd are modified
112unssd[] *= nnn; //to put value nnn a 0
113real nnn0 = nnn - sizeoverlaps + 0.001;
114Thi = trunc(Thg, unssd>nnn0, label=10); //overlapping mesh, interfaces have label 10
115
116settt
117
118// Fespace
119fespace Vhi(Thi,P1);
120int npij = npart;
121Vhi[int] pij(npij); //local partition of unit + pii
122Vhi pii;
123
124real nnn1 = +0.001;
125{
126 /*
127 construction of the partition of the unit,
128 let phi_i P1 FE function 1 on Thin and zero ouside of Thi and positive
129 the partition is build with
130 p_i = phi_i/ \sum phi_i
131
132 to build the partition of one domain i
133 we nned to find all j such that supp(phi_j) \cap supp(phi_j) is not empty
134 <=> int phi_j
135 */
136 //build a local mesh of thii such that all computation of the unit partition are
137 //exact in thii
138 mesh Thii = trunc(Thg, unssd>nnn1, label=10); //overlapping mesh, interfaces have label 10
139
140 {
141 //find all j mes (supp(p_j) cap supp(p_i)) >0
142 //compute all phi_j on Thii
143 //remark: supp p_i include in Thi
144
145 // Fespace
146 fespace Phii(Thii, P0);
147 fespace Vhii(Thii, P1);
148 Vhi sumphi = 0;
149 Vhii phii = 0;
150
151 jpart = 0;
152 njpart = 0;
153 int nlayer = RAS ? 1 : sizeoverlaps;
154 if (ipart == 0)
155 cout << "nlayer = " << nlayer << endl;
156 pii = max(unssd-nnn+nlayer, 0.)/nlayer;
157 if(dplot)
158 plot(pii, wait=1, cmm=" 0000");
159 sumphi[] += pii[];
160 if(dplot)
161 plot(sumphi, wait=1, cmm=" summ 0000");
162
163 real epsmes = 1e-10*Thii.area;
164 for (int i = 0; i < npart; ++i)
165 if (i != ipart){
166 Phii suppii = abs(i-part) < 0.2;
167 if (suppii[].max > 0.5){
168 AddLayers(Thii, suppii[], nlayer, phii[]);
169 assert(phii[].min >= 0);
170 real interij = int2d(Thi)(phii);
171 if (interij > epsmes){
172 pij[njpart] = abs(phii);
173 if(vdebug > 2)
174 cout << " ***** " << int2d(Thi)(real(pij[njpart])<0) << " " <<pij[njpart][].min << " " << phii[].min << endl;
175 assert(int2d(Thi)(real(pij[njpart]) < 0) == 0);
176 if(dplot)
177 plot(pij[njpart], wait=1, cmm=" j = "+ i + " " + njpart);
178 sumphi[] += pij[njpart][];
179 if(dplot)
180 plot(sumphi, wait=1, cmm=" sum j = "+ i + " " + njpart);
181 jpart[njpart++] = i;
182 }
183 }
184 }
185
186 if(dplot)
187 plot(sumphi, wait=1, dim=3, cmm="sum ", fill=1);
188 pii[] = pii[] ./ sumphi[];
189 for (int j = 0; j < njpart; ++j)
190 pij[j][] = pij[j][] ./ sumphi[];
191 jpart.resize(njpart);
192 for (int j = 0; j < njpart; ++j)
193 assert(pij[j][].max <= 1);
194 {
195 cout << ipart << " number of jpart " << njpart << " : ";
196 for (int j = 0; j < njpart; ++j)
197 cout << jpart[j] << " ";
198 cout << endl;
199 }
200 sumphi[] = pii[];
201 for (int j = 0; j < njpart; ++j)
202 sumphi[] += pij[j][];
203 if(vdebug > 2)
204 cout << "sum min " << sumphi[].min << " " << sumphi[].max << endl;
205 assert(sumphi[].min > 1.-1e-6 && sumphi[].max < 1.+1e-6);
206 }
207} //Thii is remove here
208// end of the construction of the local partition of the unity ...
209// on Thi
210if (ipart == 0)
211 cout << "End build partition" << endl;
212
213// Computation of number of intersection
214//here pii and the pij is the local partition of the unit on
215//Thi (mesh with overlap)
216if ( dplot){
217 plot(Thi, wait=1);
218 for(int j = 0; j < njpart; ++j)
219 plot(pij[j], cmm=" j="+j, wait=1);
220}
221
222//Partition of the unity on Thi
223//computation of message
224//all j > we have to receive
225//data on intersection of the support of pij[0] and pij[j]
226settt
227
228if(vdebug)
229 plotMPIall(Thi, pii[], "pi_i");
230
231mesh[int] aThij(njpart);
232matrix Pii;
233matrix[int] sMj(njpart); //M of send to j
234matrix[int] rMj(njpart); //M to recv from j
235fespace Whi(Thi, Pk);
236mesh Thij = Thi;
237fespace Whij(Thij, Pk);//
238
239//construction of the mesh intersect i,j part
240for(int jp = 0; jp < njpart; ++jp)
241 aThij[jp] = trunc(Thi, pij[jp] > 1e-6, label=10); //mesh of the supp of pij
242
243for(int jp = 0; jp < njpart; ++jp)
244 aThij[jp] = trunc(aThij[jp], 1, split=ksplit);
245
246Thi = trunc(Thi, 1, split=ksplit);
247
248settt
249
250if (ipart == 0)
251 cout << "End build mesh intersection" << endl;
252
253// Construction of transfert matrix
254{
255 Whi wpii = pii;
256 Pii = wpii[];
257 for(int jp = 0; jp < njpart; ++jp){
258 int j = jpart[jp];
259 Thij = aThij[jp];
260 matrix I = interpolate(Whij, Whi); //Whji <- Whi
261 sMj[jp] = I*Pii; //Whi -> s Whij
262 rMj[jp] = interpolate(Whij, Whi, t=1); //Whji -> Whi
263 if(vdebug > 10){
264 {Whi uuu=1; Whij vvv=-1; vvv[]+=I*uuu[]; cout << jp << " %%% " << vvv[].linfty << endl; assert(vvv[].linfty < 1e-6);}
265 {Whi uuu=1; Whij vvv=-1; vvv[]+=rMj[jp]'*uuu[]; cout << jp << " ### " << vvv[].linfty << endl; assert(vvv[].linfty < 1e-6);}
266 }
267 }
268}
269if (ipart == 0)
270 cout << "End build transfert matrix" << endl;
271
272// Allocate array of send and recv data
273InitU(njpart, Whij, Thij, aThij, Usend) //initU(n, Vh, Th, aTh, U)
274InitU(njpart, Whij, Thij, aThij, Vrecv)
275if (ipart == 0)
276 cout << "End init data for send/revc" << endl;
277
278Whi ui, vi;
279
280func bool Update(real[int] &ui, real[int] &vi){
281 for(int j = 0; j < njpart; ++j)
282 Usend[j][] = sMj[j]*ui;
283 SendRecvUV(comm, jpart, Usend, Vrecv)
284 vi = Pii*ui;
285 for(int j = 0; j < njpart; ++j)
286 vi += rMj[j]*Vrecv[j][];
287 return true;
288}
289
290// Definition of the Problem
291func G = x*0.1;
292func F = 1.;
293macro grad(u) [dx(u),dy(u)] //
294varf vBC (U, V) = on(1, U=G);
295varf vPb (U, V) = int2d(Thi)(grad(U)'*grad(V)) + int2d(Thi)(F*V) + on(10, U=0) + on(1, U=G);
296varf vPbC (U, V) = int2d(ThC)(grad(U)'*grad(V)) + on(1, U=0);
297varf vPbon (U, V) = on(10, U=1) + on(1, U=1);
298varf vPbon10only (U, V) = on(10, U=1) + on(1, U=0);
299//remark the order is important we want 0 part on 10 and 1
300
301matrix Ai = vPb(Whi, Whi, solver=sparsesolver);
302matrix AC, Rci, Pci;
303
304if (mpiRank(comm) == 0)
305 AC = vPbC(VhC, VhC, solver=sparsesolver);
306
307Pci = interpolate(Whi, VhC);
308Rci = Pci'*Pii;
309
310real[int] onG10 = vPbon10only(0, Whi);
311real[int] onG = vPbon(0, Whi);
312real[int] Bi=vPb(0, Whi);
313
314int kiter = -1;
315
316func bool CoarseSolve(real[int] &V, real[int] &U, mpiComm &comm){
317 //solving the coarse probleme
318 real[int] Uc(Rci.n), Bc(Uc.n);
319 Uc = Rci*U;
320 mpiReduce(Uc, Bc, processor(0, comm), mpiSUM);
321 if (mpiRank(comm) == 0)
322 Uc = AC^-1*Bc;
323 broadcast(processor(0, comm), Uc);
324 V = Pci*Uc;
325}
326
327func real[int] DJ (real[int] &U){
328 ++kiter;
329 real[int] V(U.n);
330 V = Ai*U;
331 V = onG10 ? 0.: V; //remove internal boundary
332 return V;
333}
334
335func real[int] PDJ (real[int] &U){
336 real[int] V(U.n);
337
338 real[int] b = onG10 ? 0. : U;
339 V = Ai^-1*b;
340 Update(V, U);
341 return U;
342}
343
344func real[int] PDJC (real[int] &U){
345 real[int] V(U.n);
346 CoarseSolve(V, U, comm);
347 V = -V; //-C2*Uo
348 U += Ai*V; //U = (I-A C2) Uo
349 real[int] b = onG10 ? 0. : U;
350 U = Ai^-1*b; // (C1( I -A C2) Uo
351 V = U -V;
352 Update(V, U);
353 return U;
354}
355
356func real[int] DJ0(real[int] &U){
357 ++kiter;
358 real[int] V(U.n);
359 real[int] b = onG .* U;
360 b = onG ? b : Bi ;
361 V = Ai^-1*b;
362 Update(V, U);
363 V -= U;
364 return V;
365}
366
367Whi u = 0, v;
368{ //verification
369 Whi u = 1, v;
370 Update(u[], v[]);
371 u[] -= v[];
372 assert(u[].linfty < 1e-6);
373}
374
375settt
376u[] = vBC(0, Whi, tgv=1); //set u with tgv BC value
377
378real epss = 1e-6;
379int rgmres = 0;
380if (gmres == 1){
381 rgmres = MPIAffineGMRES(DJ0, u[], veps=epss, nbiter=300, comm=comm, dimKrylov=100, verbosity=ipart ? 0: 50);
382 real[int] b = onG .* u[];
383 b = onG ? b : Bi;
384 v[] = Ai^-1*b;
385 Update(v[], u[]);
386}
387else if (gmres == 2)
388 rgmres = MPILinearGMRES(DJ, precon=PDJ, u[], Bi, veps=epss, nbiter=300, comm=comm, dimKrylov=100, verbosity=ipart ? 0: 50);
389else if (gmres == 3)
390 rgmres = MPILinearGMRES(DJ, precon=PDJC, u[], Bi, veps=epss, nbiter=300, comm=comm, dimKrylov=100, verbosity=ipart ? 0: 50);
391else //algo Shwarz for demo
392 for(int iter = 0; iter < 10; ++iter){
393 real[int] b = onG .* u[];
394 b = onG ? b : Bi ;
395 v[] = Ai^-1*b;
396
397 Update(v[], u[]);
398 if(vdebug)
399 plotMPIall(Thi, u[], "u-"+iter);
400 v[] -= u[];
401
402 real err = v[].linfty;
403 real umax = u[].max;
404 real[int] aa = [err, umax], bb(2);
405 mpiAllReduce(aa, bb, comm, mpiMAX);
406 real errg = bb[0];
407 real umaxg = bb[1];
408
409 if (ipart == 0)
410 cout << ipart << " err = " << errg << " u. max " << umaxg << endl;
411 if (errg < 1e-5) break;
412 }
413
414if (vdebug)
415 plotMPIall(Thi, u[], "u-final");
416
417settt
418
419real errg = 1, umaxg;
420{
421 real umax = u[].max, umaxg;
422 real[int] aa = [umax], bb(1);
423 mpiAllReduce(aa, bb, comm, mpiMAX);
424 errg = bb[0];
425 if (ipart == 0)
426 cout << "umax global = " << bb[0] << " Wtime = " << (ttt[ittt-1]-ttt[ittt-2]) << " s " << " " << kiter << endl;
427}
428
429if (sff != ""){
430 ofstream ff(sff+".txt", append);
431 cout << " ++++ ";
432 cout << mpirank << "/" << mpisize << " k=" << ksplit << " n= " << nloc << " " << sizeoverlaps << " it= " << kiter;
433 for (int i = 1; i < ittt; ++i)
434 cout << " " << ttt[i]-ttt[i-1] << " ";
435 cout << epss << " " << Ai.nbcoef << " " << Ai.n << endl;
436
437 /*
438 1 mpirank
439 2 mpisize
440 3 ksplit
441 4 nloc
442 5 sizeoverlaps
443 6 kiter
444 7 mesh & part build
445 8 build the partion
446 9 build mesh, transfere , and the fine mesh ..
447 10 build the matrix, the trans matrix, factorizatioon
448 11 GMRES
449 */
450
451 ff << mpirank << " " << mpisize << " " << sPk << " ";
452 ff << ksplit << " " << nloc << " " << sizeoverlaps << " " << kiter;
453 for (int i = 1; i < ittt; ++i)
454 ff << " " << ttt[i]-ttt[i-1] << " ";
455 ff << epss << " " << Ai.nbcoef << " " << Ai.n << " " << gmres << endl;
456}
MPI-GMRES 3D
Todo
todo
Direct solvers
1load "MUMPS_FreeFem"
2//default solver: real-> MUMPS, complex -> MUMPS
3load "real_SuperLU_DIST_FreeFem"
4default solver: real-> SuperLU_DIST, complex -> MUMPS
5load "real_pastix_FreeFem"
6//default solver: real-> pastix, complex -> MUMPS
7
8// Solving with pastix
9{
10 matrix A =
11 [[1, 2, 2, 1, 1],
12 [ 2, 12, 0, 10 , 10],
13 [ 2, 0, 1, 0, 2],
14 [ 1, 10, 0, 22, 0.],
15 [ 1, 10, 2, 0., 22]];
16
17 real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
18 b = A*xx;
19 cout << "b = " << b << endl;
20 cout << "xx = " << xx << endl;
21
22 set(A, solver=sparsesolver, datafilename="ffpastix_iparm_dparm.txt");
23 cout << "solve" << endl;
24 x = A^-1*b;
25 cout << "b = " << b << endl;
26 cout << "x = " << endl;
27 cout << x << endl;
28 di = xx - x;
29 if (mpirank == 0){
30 cout << "x-xx = " << endl;
31 cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
32 }
33}
34
35// Solving with SuperLU_DIST
36realdefaulttoSuperLUdist();
37//default solver: real-> SuperLU_DIST, complex -> MUMPS
38{
39 matrix A =
40 [[1, 2, 2, 1, 1],
41 [ 2, 12, 0, 10 , 10],
42 [ 2, 0, 1, 0, 2],
43 [ 1, 10, 0, 22, 0.],
44 [ 1, 10, 2, 0., 22]];
45
46 real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
47 b = A*xx;
48 cout << "b = " << b << endl;
49 cout << "xx = " << xx << endl;
50
51 set(A, solver=sparsesolver, datafilename="ffsuperlu_dist_fileparam.txt");
52 cout << "solve" << endl;
53 x = A^-1*b;
54 cout << "b = " << b << endl;
55 cout << "x = " << endl;
56 cout << x << endl;
57 di = xx - x;
58 if (mpirank == 0){
59 cout << "x-xx = " << endl;
60 cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
61 }
62}
63
64// Solving with MUMPS
65defaulttoMUMPS();
66//default solver: real-> MUMPS, complex -> MUMPS
67{
68 matrix A =
69 [[1, 2, 2, 1, 1],
70 [ 2, 12, 0, 10 , 10],
71 [ 2, 0, 1, 0, 2],
72 [ 1, 10, 0, 22, 0.],
73 [ 1, 10, 2, 0., 22]];
74
75 real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
76 b = A*xx;
77 cout << "b = " << b << endl;
78 cout << "xx = " << xx << endl;
79
80 set(A, solver=sparsesolver, datafilename="ffmumps_fileparam.txt");
81 cout << "solving solution" << endl;
82 x = A^-1*b;
83 cout << "b = " << b << endl;
84 cout << "x = " << endl;
85 cout << x << endl;
86 di = xx - x;
87 if (mpirank == 0){
88 cout << "x-xx = " << endl;
89 cout << "Linf = " << di.linfty << ", L2 " << di.l2 << endl;
90 }
91}
Solver MUMPS
1load "MUMPS_FreeFem"
2
3// Parameters
4int[int] ICNTL(40); //declaration of ICNTL parameter for MUMPS
5
6//get value of ICNTL from file
7if (mpirank == 0){
8 ifstream ff("ffmumps_fileparam.txt");
9 string line;
10 getline(ff, line);
11 getline(ff, line);
12 for (int iii = 0; iii < 40; iii++){
13 ff >> ICNTL[iii];
14 getline(ff, line);
15 }
16}
17
18broadcast(processor(0), ICNTL);
19
20// Given data of MUMPS solver in array lparams(SYM, PAR, ICNTL)
21// There is no symmetric storage for a matrix associated with a sparse solver.
22// Therefore, the matrix will be considered unsymmetric for parallel sparse solver even if symmetric.
23{
24 // Problem
25 int SYM = 0;
26 int PAR = 1;
27 matrix A =
28 [
29 [40, 0, 45, 0, 0],
30 [0, 12, 0, 0, 0],
31 [0, 0, 40, 0, 0],
32 [12, 0, 0, 22, 0],
33 [0, 0, 20, 0, 22]
34 ];
35
36 // Construction of integer parameter for MUMPS
37 int[int] MumpsLParams(42);
38 MumpsLParams[0] = SYM;
39 MumpsLParams[1] = PAR;
40 for (int ii = 0; ii < 40; ii++)
41 MumpsLParams[ii+2] = ICNTL[ii]; //ICNTL begin with index 0 here
42
43 real[int] xx = [1, 32, 45, 7, 2], x(5), b(5), di(5);
44 b = A*xx;
45 if (mpirank == 0)
46 cout << "xx = " << xx << endl;
47
48 set(A, solver=sparsesolver, lparams=MumpsLParams); //we take the default value for CNTL MUMPS parameter
49
50 // Solve
51 if (mpirank == 0)
52 cout << "Solve" << endl;
53 x = A^-1*b;
54 if (mpirank == 0)
55 cout << "b = " << b << endl;
56 if (mpirank == 0)
57 cout << "x = " << endl; cout << x << endl;
58 di = xx-x;
59 if (mpirank == 0){
60 cout << "x-xx = " << endl;
61 cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
62 }
63}
64
65// Read parameter of MUMPS solver in file ffmumps_fileparam.txt
66{
67 // Problem
68 matrix A =
69 [
70 [40, 0, 45, 0, 0],
71 [0, 12, 0, 0 , 0],
72 [0, 0, 40, 0, 0],
73 [12, 0, 0, 22, 0],
74 [0, 0, 20, 0, 22]
75 ];
76
77 real[int] xx = [1, 32, 45, 7000, 2], x(5), b(5), di(5);
78 b = A*xx;
79 if (mpirank == 0){
80 cout << "b = " << b << endl;
81 cout << "xx = " << xx << endl;
82 }
83
84 set(A, solver=sparsesolver, datafilename="ffmumps_fileparam.txt");
85
86 // Solve
87 if (mpirank == 0)
88 cout << "Solve" << endl;
89 x = A^-1*b;
90
91 if (mpirank == 0){
92 cout << "b = " << b << endl;
93 cout << "x = " << x << endl;
94 }
95 di = xx-x;
96 if (mpirank == 0){
97 cout << "x-xx = " << endl;
98 cout << "Linf = " << di.linfty << ", L2 = " << di.l2 << endl;
99 }
100}
Solver superLU_DIST
Todo
write code (SuperLU_DIST seems to have a bug)
Solver PaStiX
Todo
write code (PaStiX seems to have a bug)