MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
det.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "../quadinterpolator.hpp"
13#include "../../general/forall.hpp"
14#include "../../linalg/dtensor.hpp"
15#include "../../fem/kernels.hpp"
16#include "../../linalg/kernels.hpp"
17
18using namespace mfem;
19
20namespace mfem
21{
22
23namespace internal
24{
25
26namespace quadrature_interpolator
27{
28
29static void Det1D(const int NE,
30 const real_t *g,
31 const real_t *x,
32 real_t *y,
33 const int d1d,
34 const int q1d)
35{
36 const auto G = Reshape(g, q1d, d1d);
37 const auto X = Reshape(x, d1d, NE);
38
39 auto Y = Reshape(y, q1d, NE);
40
41 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
42 {
43 for (int q = 0; q < q1d; q++)
44 {
45 real_t u = 0.0;
46 for (int d = 0; d < d1d; d++)
47 {
48 u += G(q, d) * X(d, e);
49 }
50 Y(q, e) = u;
51 }
52 });
53}
54
55template<int T_D1D = 0, int T_Q1D = 0>
56static void Det2D(const int NE,
57 const real_t *b,
58 const real_t *g,
59 const real_t *x,
60 real_t *y,
61 const int d1d = 0,
62 const int q1d = 0)
63{
64 static constexpr int SDIM = 2;
65 static constexpr int NBZ = 1;
66
67 const int D1D = T_D1D ? T_D1D : d1d;
68 const int Q1D = T_Q1D ? T_Q1D : q1d;
69
70 const auto B = Reshape(b, Q1D, D1D);
71 const auto G = Reshape(g, Q1D, D1D);
72 const auto X = Reshape(x, D1D, D1D, SDIM, NE);
73 auto Y = Reshape(y, Q1D, Q1D, NE);
74
75 mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e)
76 {
77 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
78 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
79 const int D1D = T_D1D ? T_D1D : d1d;
80 const int Q1D = T_Q1D ? T_Q1D : q1d;
81
82 MFEM_SHARED real_t BG[2][MQ1*MD1];
83 MFEM_SHARED real_t XY[SDIM][NBZ][MD1*MD1];
84 MFEM_SHARED real_t DQ[2*SDIM][NBZ][MD1*MQ1];
85 MFEM_SHARED real_t QQ[2*SDIM][NBZ][MQ1*MQ1];
86
87 kernels::internal::LoadX<MD1,NBZ>(e,D1D,X,XY);
88 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
89
90 kernels::internal::GradX<MD1,MQ1,NBZ>(D1D,Q1D,BG,XY,DQ);
91 kernels::internal::GradY<MD1,MQ1,NBZ>(D1D,Q1D,BG,DQ,QQ);
92
93 MFEM_FOREACH_THREAD(qy,y,Q1D)
94 {
95 MFEM_FOREACH_THREAD(qx,x,Q1D)
96 {
97 real_t J[4];
98 kernels::internal::PullGrad<MQ1,NBZ>(Q1D,qx,qy,QQ,J);
99 Y(qx,qy,e) = kernels::Det<2>(J);
100 }
101 }
102 });
103}
104
105template<int T_D1D = 0, int T_Q1D = 0>
106static void Det2DSurface(const int NE,
107 const real_t *b,
108 const real_t *g,
109 const real_t *x,
110 real_t *y,
111 const int d1d = 0,
112 const int q1d = 0)
113{
114 static constexpr int SDIM = 3;
115 static constexpr int NBZ = 1;
116
117 const int D1D = T_D1D ? T_D1D : d1d;
118 const int Q1D = T_Q1D ? T_Q1D : q1d;
119
120 const auto B = Reshape(b, Q1D, D1D);
121 const auto G = Reshape(g, Q1D, D1D);
122 const auto X = Reshape(x, D1D, D1D, SDIM, NE);
123 auto Y = Reshape(y, Q1D, Q1D, NE);
124
125 mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e)
126 {
127 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
128 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
129 const int D1D = T_D1D ? T_D1D : d1d;
130 const int Q1D = T_Q1D ? T_Q1D : q1d;
131 const int tidz = MFEM_THREAD_ID(z);
132
133 MFEM_SHARED real_t BG[2][MQ1*MD1];
134 MFEM_SHARED real_t XYZ[SDIM][NBZ][MD1*MD1];
135 MFEM_SHARED real_t DQ[2*SDIM][NBZ][MD1*MQ1];
136
137 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
138
139 // Load XYZ components
140 MFEM_FOREACH_THREAD(dy,y,D1D)
141 {
142 MFEM_FOREACH_THREAD(dx,x,D1D)
143 {
144 for (int d = 0; d < SDIM; ++d)
145 {
146 XYZ[d][tidz][dx + dy*D1D] = X(dx,dy,d,e);
147 }
148 }
149 }
150 MFEM_SYNC_THREAD;
151
152 ConstDeviceMatrix B_mat(BG[0], D1D, Q1D);
153 ConstDeviceMatrix G_mat(BG[1], D1D, Q1D);
154
155 // x contraction
156 MFEM_FOREACH_THREAD(dy,y,D1D)
157 {
158 MFEM_FOREACH_THREAD(qx,x,Q1D)
159 {
160 for (int d = 0; d < SDIM; ++d)
161 {
162 real_t u = 0.0;
163 real_t v = 0.0;
164 for (int dx = 0; dx < D1D; ++dx)
165 {
166 const real_t xval = XYZ[d][tidz][dx + dy*D1D];
167 u += xval * G_mat(dx,qx);
168 v += xval * B_mat(dx,qx);
169 }
170 DQ[d][tidz][dy + qx*D1D] = u;
171 DQ[3 + d][tidz][dy + qx*D1D] = v;
172 }
173 }
174 }
175 MFEM_SYNC_THREAD;
176 // y contraction and determinant computation
177 MFEM_FOREACH_THREAD(qy,y,Q1D)
178 {
179 MFEM_FOREACH_THREAD(qx,x,Q1D)
180 {
181 real_t J_[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
182 for (int d = 0; d < SDIM; ++d)
183 {
184 for (int dy = 0; dy < D1D; ++dy)
185 {
186 J_[d] += DQ[d][tidz][dy + qx*D1D] * B_mat(dy,qy);
187 J_[3 + d] += DQ[3 + d][tidz][dy + qx*D1D] * G_mat(dy,qy);
188 }
189 }
190 DeviceTensor<2> J(J_, 3, 2);
191 const real_t E = J(0,0)*J(0,0) + J(1,0)*J(1,0) + J(2,0)*J(2,0);
192 const real_t F = J(0,0)*J(0,1) + J(1,0)*J(1,1) + J(2,0)*J(2,1);
193 const real_t G = J(0,1)*J(0,1) + J(1,1)*J(1,1) + J(2,1)*J(2,1);
194 Y(qx,qy,e) = sqrt(E*G - F*F);
195 }
196 }
197 });
198}
199
200template<int T_D1D = 0, int T_Q1D = 0, bool SMEM = true>
201static void Det3D(const int NE,
202 const real_t *b,
203 const real_t *g,
204 const real_t *x,
205 real_t *y,
206 const int d1d = 0,
207 const int q1d = 0,
208 Vector *d_buff = nullptr) // used only with SMEM = false
209{
210 constexpr int DIM = 3;
211 static constexpr int GRID = SMEM ? 0 : 128;
212
213 const int D1D = T_D1D ? T_D1D : d1d;
214 const int Q1D = T_Q1D ? T_Q1D : q1d;
215
216 const auto B = Reshape(b, Q1D, D1D);
217 const auto G = Reshape(g, Q1D, D1D);
218 const auto X = Reshape(x, D1D, D1D, D1D, DIM, NE);
219 auto Y = Reshape(y, Q1D, Q1D, Q1D, NE);
220
221 real_t *GM = nullptr;
222 if (!SMEM)
223 {
225 const int max_q1d = T_Q1D ? T_Q1D : limits.MAX_D1D;
226 const int max_d1d = T_D1D ? T_D1D : limits.MAX_Q1D;
227 const int max_qd = std::max(max_q1d, max_d1d);
228 const int mem_size = max_qd * max_qd * max_qd * 9;
229 d_buff->SetSize(2*mem_size*GRID);
230 GM = d_buff->Write();
231 }
232
233 mfem::forall_3D_grid(NE, Q1D, Q1D, Q1D, GRID, [=] MFEM_HOST_DEVICE (int e)
234 {
235 static constexpr int MQ1 = T_Q1D ? T_Q1D :
236 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_D1D);
237 static constexpr int MD1 = T_D1D ? T_D1D :
238 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_Q1D);
239 static constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
240 static constexpr int MSZ = MDQ * MDQ * MDQ * 9;
241
242 const int bid = MFEM_BLOCK_ID(x);
243 MFEM_SHARED real_t BG[2][MQ1*MD1];
244 MFEM_SHARED real_t SM0[SMEM?MSZ:1];
245 MFEM_SHARED real_t SM1[SMEM?MSZ:1];
246 real_t *lm0 = SMEM ? SM0 : GM + MSZ*bid;
247 real_t *lm1 = SMEM ? SM1 : GM + MSZ*(GRID+bid);
248 real_t (*DDD)[MD1*MD1*MD1] = (real_t (*)[MD1*MD1*MD1]) (lm0);
249 real_t (*DDQ)[MD1*MD1*MQ1] = (real_t (*)[MD1*MD1*MQ1]) (lm1);
250 real_t (*DQQ)[MD1*MQ1*MQ1] = (real_t (*)[MD1*MQ1*MQ1]) (lm0);
251 real_t (*QQQ)[MQ1*MQ1*MQ1] = (real_t (*)[MQ1*MQ1*MQ1]) (lm1);
252
253 kernels::internal::LoadX<MD1>(e,D1D,X,DDD);
254 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
255
256 kernels::internal::GradX<MD1,MQ1>(D1D,Q1D,BG,DDD,DDQ);
257 kernels::internal::GradY<MD1,MQ1>(D1D,Q1D,BG,DDQ,DQQ);
258 kernels::internal::GradZ<MD1,MQ1>(D1D,Q1D,BG,DQQ,QQQ);
259
260 MFEM_FOREACH_THREAD(qz,z,Q1D)
261 {
262 MFEM_FOREACH_THREAD(qy,y,Q1D)
263 {
264 MFEM_FOREACH_THREAD(qx,x,Q1D)
265 {
266 real_t J[9];
267 kernels::internal::PullGrad<MQ1>(Q1D, qx,qy,qz, QQQ, J);
268 Y(qx,qy,qz,e) = kernels::Det<3>(J);
269 }
270 }
271 }
272 });
273}
274
275// Tensor-product evaluation of quadrature point determinants: dispatch
276// function.
277void TensorDeterminants(const int NE,
278 const int vdim,
279 const DofToQuad &maps,
280 const Vector &e_vec,
281 Vector &q_det,
282 Vector &d_buff)
283{
284 if (NE == 0) { return; }
285 const int dim = maps.FE->GetDim();
286 const int D1D = maps.ndof;
287 const int Q1D = maps.nqpt;
288 const real_t *B = maps.B.Read();
289 const real_t *G = maps.G.Read();
290 const real_t *X = e_vec.Read();
291 real_t *Y = q_det.Write();
292
293 const int id = (vdim<<8) | (D1D<<4) | Q1D;
294
295 if (dim == 1)
296 {
297 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D,
298 "Orders higher than " << DeviceDofQuadLimits::Get().MAX_D1D-1
299 << " are not supported!");
300 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D,
301 "Quadrature rules with more than "
302 << DeviceDofQuadLimits::Get().MAX_Q1D << " 1D points are not supported!");
303 Det1D(NE, G, X, Y, D1D, Q1D);
304 return;
305 }
306 if (dim == 2)
307 {
308 switch (id)
309 {
310 case 0x222: return Det2D<2,2>(NE,B,G,X,Y);
311 case 0x223: return Det2D<2,3>(NE,B,G,X,Y);
312 case 0x224: return Det2D<2,4>(NE,B,G,X,Y);
313 case 0x226: return Det2D<2,6>(NE,B,G,X,Y);
314 case 0x234: return Det2D<3,4>(NE,B,G,X,Y);
315 case 0x236: return Det2D<3,6>(NE,B,G,X,Y);
316 case 0x244: return Det2D<4,4>(NE,B,G,X,Y);
317 case 0x246: return Det2D<4,6>(NE,B,G,X,Y);
318 case 0x256: return Det2D<5,6>(NE,B,G,X,Y);
319 default:
320 {
321 const int MD = DeviceDofQuadLimits::Get().MAX_D1D;
322 const int MQ = DeviceDofQuadLimits::Get().MAX_Q1D;
323 MFEM_VERIFY(D1D <= MD, "Orders higher than " << MD-1
324 << " are not supported!");
325 MFEM_VERIFY(Q1D <= MQ, "Quadrature rules with more than "
326 << MQ << " 1D points are not supported!");
327 if (vdim == 2) { Det2D(NE,B,G,X,Y,D1D,Q1D); }
328 else if (vdim == 3) { Det2DSurface(NE,B,G,X,Y,D1D,Q1D); }
329 else { MFEM_ABORT("Invalid space dimension."); }
330 return;
331 }
332 }
333 }
334 if (dim == 3)
335 {
336 switch (id)
337 {
338 case 0x324: return Det3D<2,4>(NE,B,G,X,Y);
339 case 0x333: return Det3D<3,3>(NE,B,G,X,Y);
340 case 0x335: return Det3D<3,5>(NE,B,G,X,Y);
341 case 0x336: return Det3D<3,6>(NE,B,G,X,Y);
342 default:
343 {
344 const int MD = DeviceDofQuadLimits::Get().MAX_DET_1D;
345 const int MQ = DeviceDofQuadLimits::Get().MAX_DET_1D;
346 // Highest orders that fit in shared memory
347 if (D1D <= MD && Q1D <= MQ)
348 { return Det3D<0,0,true>(NE,B,G,X,Y,D1D,Q1D); }
349 // Last fall-back will use global memory
350 return Det3D<0,0,false>(
351 NE,B,G,X,Y,D1D,Q1D,&d_buff);
352 }
353 }
354 }
355 MFEM_ABORT("Kernel " << std::hex << id << std::dec << " not supported yet");
356}
357
358} // namespace quadrature_interpolator
359
360} // namespace internal
361
362} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:317
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:82
Structure representing the matrices/tensors needed to evaluate (in reference space) the values,...
Definition: fe_base.hpp:137
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition: fe_base.hpp:210
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition: fe_base.hpp:189
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition: fe_base.hpp:174
const class FiniteElement * FE
The FiniteElement that created and owns this object.
Definition: fe_base.hpp:141
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition: fe_base.hpp:178
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe_base.hpp:316
Vector data type.
Definition: vector.hpp:80
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:474
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:482
int dim
Definition: ex24.cpp:53
real_t b
Definition: lissajous.cpp:42
constexpr int SDIM
constexpr int DIM
real_t u(const Vector &xvec)
Definition: lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
Definition: forall.hpp:769
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
Definition: lor_util.hpp:28
float real_t
Definition: config.hpp:43
void forall_3D_grid(int N, int X, int Y, int Z, int G, lambda &&body)
Definition: forall.hpp:781
MFEM_HOST_DEVICE real_t Det2D(DeviceMatrix &J)
Definition: lor_util.hpp:23
void forall(int N, lambda &&body)
Definition: forall.hpp:754
Maximum number of 1D DOFs or quadrature points for the current runtime configuration of the Device (u...
Definition: forall.hpp:114
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition: forall.hpp:125
int MAX_DET_1D
Maximum number of points for determinant computation in QuadratureInterpolator.
Definition: forall.hpp:122
int MAX_D1D
Maximum number of 1D nodal points.
Definition: forall.hpp:115
int MAX_Q1D
Maximum number of 1D quadrature points.
Definition: forall.hpp:116