MFEM  v3.3
Finite element discretization library
miniapps/performance/ex1p.cpp
Go to the documentation of this file.
1 // MFEM Example 1 - Parallel High-Performance Version
2 //
3 // Compile with: make ex1p
4 //
5 // Sample runs: mpirun -np 4 ex1p -m ../../data/fichera.mesh -perf -mf -pc lor
6 // mpirun -np 4 ex1p -m ../../data/fichera.mesh -perf -asm -pc ho
7 // mpirun -np 4 ex1p -m ../../data/fichera.mesh -perf -asm -pc ho -sc
8 // mpirun -np 4 ex1p -m ../../data/fichera.mesh -std -asm -pc ho
9 // mpirun -np 4 ex1p -m ../../data/fichera.mesh -std -asm -pc ho -sc
10 // mpirun -np 4 ex1p -m ../../data/amr-hex.mesh -perf -asm -pc ho -sc
11 // mpirun -np 4 ex1p -m ../../data/amr-hex.mesh -std -asm -pc ho -sc
12 // mpirun -np 4 ex1p -m ../../data/ball-nurbs.mesh -perf -asm -pc ho -sc
13 // mpirun -np 4 ex1p -m ../../data/ball-nurbs.mesh -std -asm -pc ho -sc
14 // mpirun -np 4 ex1p -m ../../data/pipe-nurbs.mesh -perf -mf -pc lor
15 // mpirun -np 4 ex1p -m ../../data/pipe-nurbs.mesh -std -asm -pc ho -sc
16 //
17 // Description: This example code demonstrates the use of MFEM to define a
18 // simple finite element discretization of the Laplace problem
19 // -Delta u = 1 with homogeneous Dirichlet boundary conditions.
20 // Specifically, we discretize using a FE space of the specified
21 // order, or if order < 1 using an isoparametric/isogeometric
22 // space (i.e. quadratic for quadratic curvilinear mesh, NURBS for
23 // NURBS mesh, etc.)
24 //
25 // The example highlights the use of mesh refinement, finite
26 // element grid functions, as well as linear and bilinear forms
27 // corresponding to the left-hand side and right-hand side of the
28 // discrete linear system. We also cover the explicit elimination
29 // of essential boundary conditions, static condensation, and the
30 // optional connection to the GLVis tool for visualization.
31 
32 #include "mfem-performance.hpp"
33 #include <fstream>
34 #include <iostream>
35 
36 using namespace std;
37 using namespace mfem;
38 
39 // Define template parameters for optimized build.
40 const Geometry::Type geom = Geometry::CUBE; // mesh elements (default: hex)
41 const int mesh_p = 3; // mesh curvature (default: 3)
42 const int sol_p = 3; // solution order (default: 3)
44 const int ir_order = 2*sol_p+rdim-1;
45 
46 // Static mesh type
50 
51 // Static solution finite element space type
54 
55 // Static quadrature, coefficient and integrator types
59 
60 // Static bilinear form type, combining the above types
62 
63 int main(int argc, char *argv[])
64 {
65  // 1. Initialize MPI.
66  int num_procs, myid;
67  MPI_Init(&argc, &argv);
68  MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
69  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
70 
71  // 2. Parse command-line options.
72  const char *mesh_file = "../../data/fichera.mesh";
73  int order = sol_p;
74  const char *basis_type = "G"; // Gauss-Lobatto
75  bool static_cond = false;
76  const char *pc = "lor";
77  bool perf = true;
78  bool matrix_free = true;
79  bool visualization = 1;
80 
81  OptionsParser args(argc, argv);
82  args.AddOption(&mesh_file, "-m", "--mesh",
83  "Mesh file to use.");
84  args.AddOption(&order, "-o", "--order",
85  "Finite element order (polynomial degree) or -1 for"
86  " isoparametric space.");
87  args.AddOption(&basis_type, "-b", "--basis-type",
88  "Basis: G - Gauss-Lobatto, P - Positive, U - Uniform");
89  args.AddOption(&perf, "-perf", "--hpc-version", "-std", "--standard-version",
90  "Enable high-performance, tensor-based, assembly/evaluation.");
91  args.AddOption(&matrix_free, "-mf", "--matrix-free", "-asm", "--assembly",
92  "Use matrix-free evaluation or efficient matrix assembly in "
93  "the high-performance version.");
94  args.AddOption(&pc, "-pc", "--preconditioner",
95  "Preconditioner: lor - low-order-refined (matrix-free) AMG, "
96  "ho - high-order (assembled) AMG, none.");
97  args.AddOption(&static_cond, "-sc", "--static-condensation", "-no-sc",
98  "--no-static-condensation", "Enable static condensation.");
99  args.AddOption(&visualization, "-vis", "--visualization", "-no-vis",
100  "--no-visualization",
101  "Enable or disable GLVis visualization.");
102  args.Parse();
103  if (!args.Good())
104  {
105  if (myid == 0)
106  {
107  args.PrintUsage(cout);
108  }
109  MPI_Finalize();
110  return 1;
111  }
112  if (static_cond && perf && matrix_free)
113  {
114  if (myid == 0)
115  {
116  cout << "\nStatic condensation can not be used with matrix-free"
117  " evaluation!\n" << endl;
118  }
119  MPI_Finalize();
120  return 2;
121  }
122  MFEM_VERIFY(perf || !matrix_free,
123  "--standard-version is not compatible with --matrix-free");
124  if (myid == 0)
125  {
126  args.PrintOptions(cout);
127  }
128 
129  enum PCType { NONE, LOR, HO };
130  PCType pc_choice;
131  if (!strcmp(pc, "ho")) { pc_choice = HO; }
132  else if (!strcmp(pc, "lor")) { pc_choice = LOR; }
133  else if (!strcmp(pc, "none")) { pc_choice = NONE; }
134  else
135  {
136  mfem_error("Invalid Preconditioner specified");
137  return 3;
138  }
139 
140  // See class BasisType in fem/fe_coll.hpp for available basis types
141  int basis = BasisType::GetType(basis_type[0]);
142  if (myid == 0)
143  {
144  cout << "Using " << BasisType::Name(basis) << " basis ..." << endl;
145  }
146 
147  // 3. Read the (serial) mesh from the given mesh file on all processors. We
148  // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface
149  // and volume meshes with the same code.
150  Mesh *mesh = new Mesh(mesh_file, 1, 1);
151  int dim = mesh->Dimension();
152 
153  // 4. Check if the optimized version matches the given mesh
154  if (perf)
155  {
156  if (myid == 0)
157  {
158  cout << "High-performance version using integration rule with "
159  << int_rule_t::qpts << " points ..." << endl;
160  }
161  if (!mesh_t::MatchesGeometry(*mesh))
162  {
163  if (myid == 0)
164  {
165  cout << "The given mesh does not match the optimized 'geom' parameter.\n"
166  << "Recompile with suitable 'geom' value." << endl;
167  }
168  delete mesh;
169  MPI_Finalize();
170  return 4;
171  }
172  else if (!mesh_t::MatchesNodes(*mesh))
173  {
174  if (myid == 0)
175  {
176  cout << "Switching the mesh curvature to match the "
177  << "optimized value (order " << mesh_p << ") ..." << endl;
178  }
179  mesh->SetCurvature(mesh_p, false, -1, Ordering::byNODES);
180  }
181  }
182 
183  // 5. Refine the serial mesh on all processors to increase the resolution. In
184  // this example we do 'ref_levels' of uniform refinement. We choose
185  // 'ref_levels' to be the largest number that gives a final mesh with no
186  // more than 10,000 elements.
187  {
188  int ref_levels =
189  (int)floor(log(10000./mesh->GetNE())/log(2.)/dim);
190  for (int l = 0; l < ref_levels; l++)
191  {
192  mesh->UniformRefinement();
193  }
194  }
195  if (!perf && mesh->NURBSext)
196  {
197  const int new_mesh_p = std::min(sol_p, mesh_p);
198  if (myid == 0)
199  {
200  cout << "NURBS mesh: switching the mesh curvature to be "
201  << "min(sol_p, mesh_p) = " << new_mesh_p << " ..." << endl;
202  }
203  mesh->SetCurvature(new_mesh_p, false, -1, Ordering::byNODES);
204  }
205 
206  // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine
207  // this mesh further in parallel to increase the resolution. Once the
208  // parallel mesh is defined, the serial mesh can be deleted.
209  ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
210  delete mesh;
211  {
212  int par_ref_levels = 1;
213  for (int l = 0; l < par_ref_levels; l++)
214  {
215  pmesh->UniformRefinement();
216  }
217  }
218  if (pmesh->MeshGenerator() & 1) // simplex mesh
219  {
220  MFEM_VERIFY(pc_choice != LOR, "triangle and tet meshes do not support"
221  " the LOR preconditioner yet");
222  }
223 
224  // 7. Define a parallel finite element space on the parallel mesh. Here we
225  // use continuous Lagrange finite elements of the specified order. If
226  // order < 1, we instead use an isoparametric/isogeometric space.
228  if (order > 0)
229  {
230  fec = new H1_FECollection(order, dim, basis);
231  }
232  else if (pmesh->GetNodes())
233  {
234  fec = pmesh->GetNodes()->OwnFEC();
235  if (myid == 0)
236  {
237  cout << "Using isoparametric FEs: " << fec->Name() << endl;
238  }
239  }
240  else
241  {
242  fec = new H1_FECollection(order = 1, dim, basis);
243  }
244  ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec);
245  HYPRE_Int size = fespace->GlobalTrueVSize();
246  if (myid == 0)
247  {
248  cout << "Number of finite element unknowns: " << size << endl;
249  }
250 
251  ParMesh *pmesh_lor = NULL;
252  FiniteElementCollection *fec_lor = NULL;
253  ParFiniteElementSpace *fespace_lor = NULL;
254  if (pc_choice == LOR)
255  {
256  int basis_lor = basis;
257  if (basis == BasisType::Positive) { basis_lor=BasisType::ClosedUniform; }
258  pmesh_lor = new ParMesh(pmesh, order, basis_lor);
259  fec_lor = new H1_FECollection(1, dim);
260  fespace_lor = new ParFiniteElementSpace(pmesh_lor, fec_lor);
261  }
262 
263  // 8. Check if the optimized version matches the given space
264  if (perf && !sol_fes_t::Matches(*fespace))
265  {
266  if (myid == 0)
267  {
268  cout << "The given order does not match the optimized parameter.\n"
269  << "Recompile with suitable 'sol_p' value." << endl;
270  }
271  delete fespace;
272  delete fec;
273  delete mesh;
274  MPI_Finalize();
275  return 5;
276  }
277 
278  // 9. Determine the list of true (i.e. parallel conforming) essential
279  // boundary dofs. In this example, the boundary conditions are defined
280  // by marking all the boundary attributes from the mesh as essential
281  // (Dirichlet) and converting them to a list of true dofs.
282  Array<int> ess_tdof_list;
283  if (pmesh->bdr_attributes.Size())
284  {
285  Array<int> ess_bdr(pmesh->bdr_attributes.Max());
286  ess_bdr = 1;
287  fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list);
288  }
289 
290  // 10. Set up the parallel linear form b(.) which corresponds to the
291  // right-hand side of the FEM linear system, which in this case is
292  // (1,phi_i) where phi_i are the basis functions in fespace.
293  ParLinearForm *b = new ParLinearForm(fespace);
294  ConstantCoefficient one(1.0);
296  b->Assemble();
297 
298  // 11. Define the solution vector x as a parallel finite element grid
299  // function corresponding to fespace. Initialize x with initial guess of
300  // zero, which satisfies the boundary conditions.
301  ParGridFunction x(fespace);
302  x = 0.0;
303 
304  // 12. Set up the parallel bilinear form a(.,.) on the finite element space
305  // that will hold the matrix corresponding to the Laplacian operator.
306  ParBilinearForm *a = new ParBilinearForm(fespace);
307  ParBilinearForm *a_pc = NULL;
308  if (pc_choice == LOR) { a_pc = new ParBilinearForm(fespace_lor); }
309  if (pc_choice == HO) { a_pc = new ParBilinearForm(fespace); }
310 
311  // 13. Assemble the parallel bilinear form and the corresponding linear
312  // system, applying any necessary transformations such as: parallel
313  // assembly, eliminating boundary conditions, applying conforming
314  // constraints for non-conforming AMR, static condensation, etc.
315  if (static_cond)
316  {
318  MFEM_VERIFY(pc_choice != LOR,
319  "cannot use LOR preconditioner with static condensation");
320  }
321 
322  if (myid == 0)
323  {
324  cout << "Assembling the matrix ..." << flush;
325  }
326  tic_toc.Clear();
327  tic_toc.Start();
328  // Pre-allocate sparsity assuming dense element matrices
330 
331  HPCBilinearForm *a_hpc = NULL;
332  Operator *a_oper = NULL;
333 
334  if (!perf)
335  {
336  // Standard assembly using a diffusion domain integrator
338  a->Assemble();
339  }
340  else
341  {
342  // High-performance assembly/evaluation using the templated operator type
343  a_hpc = new HPCBilinearForm(integ_t(coeff_t(1.0)), *fespace);
344  if (matrix_free)
345  {
346  a_hpc->Assemble(); // partial assembly
347  }
348  else
349  {
350  a_hpc->AssembleBilinearForm(*a); // full matrix assembly
351  }
352  }
353  tic_toc.Stop();
354  if (myid == 0)
355  {
356  cout << " done, " << tic_toc.RealTime() << "s." << endl;
357  }
358 
359  // 14. Define and apply a parallel PCG solver for AX=B with the BoomerAMG
360  // preconditioner from hypre.
361 
362  // Setup the operator matrix (if applicable)
363  HypreParMatrix A;
364  Vector B, X;
365  if (perf && matrix_free)
366  {
367  a_hpc->FormLinearSystem(ess_tdof_list, x, *b, a_oper, X, B);
368  HYPRE_Int glob_size = fespace->GlobalTrueVSize();
369  if (myid == 0)
370  {
371  cout << "Size of linear system: " << glob_size << endl;
372  }
373  }
374  else
375  {
376  a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B);
377  HYPRE_Int glob_size = A.GetGlobalNumRows();
378  if (myid == 0)
379  {
380  cout << "Size of linear system: " << glob_size << endl;
381  }
382  a_oper = &A;
383  }
384 
385  // Setup the matrix used for preconditioning
386  if (myid == 0)
387  {
388  cout << "Assembling the preconditioning matrix ..." << flush;
389  }
390  tic_toc.Clear();
391  tic_toc.Start();
392 
393  HypreParMatrix A_pc;
394  if (pc_choice == LOR)
395  {
396  // TODO: assemble the LOR matrix using the performance code
398  a_pc->UsePrecomputedSparsity();
399  a_pc->Assemble();
400  a_pc->FormSystemMatrix(ess_tdof_list, A_pc);
401  }
402  else if (pc_choice == HO)
403  {
404  if (!matrix_free)
405  {
406  A_pc.MakeRef(A); // matrix already assembled, reuse it
407  }
408  else
409  {
410  a_pc->UsePrecomputedSparsity();
411  a_hpc->AssembleBilinearForm(*a_pc);
412  a_pc->FormSystemMatrix(ess_tdof_list, A_pc);
413  }
414  }
415  tic_toc.Stop();
416  if (myid == 0)
417  {
418  cout << " done, " << tic_toc.RealTime() << "s." << endl;
419  }
420 
421  // Solve with CG or PCG, depending if the matrix A_pc is available
422  CGSolver *pcg;
423  pcg = new CGSolver(MPI_COMM_WORLD);
424  pcg->SetRelTol(1e-6);
425  pcg->SetMaxIter(500);
426  pcg->SetPrintLevel(1);
427 
428  HypreSolver *amg = NULL;
429 
430  pcg->SetOperator(*a_oper);
431  if (pc_choice != NONE)
432  {
433  amg = new HypreBoomerAMG(A_pc);
434  pcg->SetPreconditioner(*amg);
435  }
436 
437  tic_toc.Clear();
438  tic_toc.Start();
439 
440  pcg->Mult(B, X);
441 
442  tic_toc.Stop();
443  delete amg;
444 
445  if (myid == 0)
446  {
447  // Note: In the pcg algorithm, the number of operator Mult() calls is
448  // N_iter and the number of preconditioner Mult() calls is N_iter+1.
449  cout << "Time per CG step: "
450  << tic_toc.RealTime() / pcg->GetNumIterations() << "s." << endl;
451  }
452 
453  // 15. Recover the parallel grid function corresponding to X. This is the
454  // local finite element solution on each processor.
455  if (perf && matrix_free)
456  {
457  a_hpc->RecoverFEMSolution(X, *b, x);
458  }
459  else
460  {
461  a->RecoverFEMSolution(X, *b, x);
462  }
463 
464  // 16. Save the refined mesh and the solution in parallel. This output can
465  // be viewed later using GLVis: "glvis -np <np> -m mesh -g sol".
466  {
467  ostringstream mesh_name, sol_name;
468  mesh_name << "mesh." << setfill('0') << setw(6) << myid;
469  sol_name << "sol." << setfill('0') << setw(6) << myid;
470 
471  ofstream mesh_ofs(mesh_name.str().c_str());
472  mesh_ofs.precision(8);
473  pmesh->Print(mesh_ofs);
474 
475  ofstream sol_ofs(sol_name.str().c_str());
476  sol_ofs.precision(8);
477  x.Save(sol_ofs);
478  }
479 
480  // 17. Send the solution by socket to a GLVis server.
481  if (visualization)
482  {
483  char vishost[] = "localhost";
484  int visport = 19916;
485  socketstream sol_sock(vishost, visport);
486  sol_sock << "parallel " << num_procs << " " << myid << "\n";
487  sol_sock.precision(8);
488  sol_sock << "solution\n" << *pmesh << x << flush;
489  }
490 
491  // 18. Free the used memory.
492  delete a;
493  delete a_hpc;
494  if (a_oper != &A) { delete a_oper; }
495  delete a_pc;
496  delete b;
497  delete fespace;
498  delete fespace_lor;
499  delete fec_lor;
500  delete pmesh_lor;
501  if (order > 0) { delete fec; }
502  delete pmesh;
503  delete pcg;
504 
505  MPI_Finalize();
506 
507  return 0;
508 }
int Size() const
Logical size of the array.
Definition: array.hpp:109
Class for domain integration L(v) := (f, v)
Definition: lininteg.hpp:47
Conjugate gradient method.
Definition: solvers.hpp:111
int GetNumIterations() const
Definition: solvers.hpp:66
Subclass constant coefficient.
Definition: coefficient.hpp:57
void FormLinearSystem(const Array< int > &ess_tdof_list, Vector &x, Vector &b, OperatorHandle &A, Vector &X, Vector &B, int copy_interior=0)
void MakeRef(const HypreParMatrix &master)
Make this HypreParMatrix a reference to 'master'.
Definition: hypre.cpp:766
void Assemble()
Assembles the linear form i.e. sums over all domain/bdr integrators.
Definition: linearform.cpp:42
void AssembleBilinearForm(BilinearForm &a) const
TBilinearForm< mesh_t, sol_fes_t, int_rule_t, integ_t > HPCBilinearForm
H1_FiniteElement< geom, sol_p > sol_fe_t
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:584
virtual void Save(std::ostream &out) const
Definition: pgridfunc.cpp:314
StopWatch tic_toc
Definition: tic_toc.cpp:338
double RealTime()
Definition: tic_toc.cpp:317
Abstract parallel finite element space.
Definition: pfespace.hpp:28
STL namespace.
virtual void Mult(const Vector &x, Vector &y) const
Operator application: y=A(x).
Definition: solvers.cpp:294
TIntegrator< coeff_t, TDiffusionKernel > integ_t
const Geometry::Type geom
The BoomerAMG solver in hypre.
Definition: hypre.hpp:770
Class for parallel linear form.
Definition: plinearform.hpp:26
HYPRE_Int GetGlobalNumRows() const
Definition: hypre.hpp:371
int dim
Definition: ex3.cpp:47
void SetPrintLevel(int print_lvl)
Definition: solvers.cpp:71
void UniformRefinement(int i, const DSTable &, int *, int *, int *)
Definition: mesh.cpp:6684
void SetMaxIter(int max_it)
Definition: solvers.hpp:63
void SetCurvature(int order, bool discont=false, int space_dim=-1, int ordering=1)
Definition: mesh.cpp:3278
int MeshGenerator()
Get the mesh generator/type.
Definition: mesh.hpp:577
T Max() const
Find the maximal element in the array, using the comparison operator < for class T.
Definition: array.cpp:108
void Assemble(int skip_zeros=1)
Assemble the local matrix.
int Dimension() const
Definition: mesh.hpp:611
void PrintUsage(std::ostream &out) const
Definition: optparser.cpp:434
H1_FiniteElementSpace< sol_fe_t > sol_fes_t
TMesh< mesh_fes_t > mesh_t
const int ir_order
const int rdim
void AddDomainIntegrator(LinearFormIntegrator *lfi)
Adds new Domain Integrator.
Definition: linearform.cpp:19
Array< int > bdr_attributes
A list of all unique boundary attributes used by the Mesh.
Definition: mesh.hpp:142
void SetRelTol(double rtol)
Definition: solvers.hpp:61
void UsePrecomputedSparsity(int ps=1)
virtual void GetEssentialTrueDofs(const Array< int > &bdr_attr_is_ess, Array< int > &ess_tdof_list)
Definition: pfespace.cpp:521
void AddOption(bool *var, const char *enable_short_name, const char *enable_long_name, const char *disable_short_name, const char *disable_long_name, const char *description, bool required=false)
Definition: optparser.hpp:74
void mfem_error(const char *msg)
Definition: error.cpp:106
void FormLinearSystem(const Array< int > &ess_tdof_list, Vector &x, Vector &b, Operator *&A, Vector &X, Vector &B, int copy_interior=0)
Form a constrained linear system using a matrix-free approach.
Definition: operator.cpp:21
TConstantCoefficient coeff_t
virtual void RecoverFEMSolution(const Vector &X, const Vector &b, Vector &x)
Reconstruct a solution vector x (e.g. a GridFunction) from the solution X of a constrained linear sys...
Definition: operator.cpp:57
virtual void RecoverFEMSolution(const Vector &X, const Vector &b, Vector &x)
NURBSExtension * NURBSext
Optional NURBS mesh extension.
Definition: mesh.hpp:144
void FormSystemMatrix(const Array< int > &ess_tdof_list, OperatorHandle &A)
Form the linear system matrix A, see FormLinearSystem() for details.
virtual const char * Name() const
Definition: fe_coll.hpp:117
H1_FiniteElement< geom, mesh_p > mesh_fe_t
H1_FiniteElementSpace< mesh_fe_t > mesh_fes_t
const int sol_p
void AddDomainIntegrator(BilinearFormIntegrator *bfi)
Adds new Domain Integrator.
void PrintOptions(std::ostream &out) const
Definition: optparser.cpp:304
int main(int argc, char *argv[])
Class for parallel bilinear form.
Abstract class for hypre's solvers and preconditioners.
Definition: hypre.hpp:594
virtual void SetOperator(const Operator &op)
Also calls SetOperator for the preconditioner if there is one.
Definition: solvers.hpp:125
const int mesh_p
Vector data type.
Definition: vector.hpp:36
void GetNodes(Vector &node_coord) const
Definition: mesh.cpp:5368
virtual void SetPreconditioner(Solver &pr)
This should be called before SetOperator.
Definition: solvers.cpp:92
Arbitrary order H1-conforming (continuous) finite elements.
Definition: fe_coll.hpp:146
TIntegrationRule< geom, ir_order > int_rule_t
Class for parallel grid function.
Definition: pgridfunc.hpp:31
Abstract operator.
Definition: operator.hpp:21
Wrapper for hypre's ParCSR matrix class.
Definition: hypre.hpp:174
Class for parallel meshes.
Definition: pmesh.hpp:28
void EnableStaticCondensation()
virtual void Print(std::ostream &out=std::cout) const
Definition: pmesh.cpp:3395
bool Good() const
Definition: optparser.hpp:120