MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
grad.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Internal header, included only by .cpp files.
13// Template function implementations.
14
15#include "../quadinterpolator.hpp"
16#include "../../general/forall.hpp"
17#include "../../linalg/dtensor.hpp"
18#include "../../fem/kernels.hpp"
19#include "../../linalg/kernels.hpp"
20
21namespace mfem
22{
23
24namespace internal
25{
26
27namespace quadrature_interpolator
28{
29
30template<QVectorLayout Q_LAYOUT, bool GRAD_PHYS>
31static void Derivatives1D(const int NE,
32 const real_t *g_,
33 const real_t *j_,
34 const real_t *x_,
35 real_t *y_,
36 const int sdim,
37 const int vdim,
38 const int d1d,
39 const int q1d)
40{
41 const auto g = Reshape(g_, q1d, d1d);
42 const auto j = Reshape(j_, q1d, sdim, NE);
43 const auto x = Reshape(x_, d1d, vdim, NE);
44 auto y = Q_LAYOUT == QVectorLayout::byNODES ?
45 Reshape(y_, q1d, vdim, sdim, NE):
46 Reshape(y_, vdim, sdim, q1d, NE);
47
48 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
49 {
50 for (int c = 0; c < vdim; c++)
51 {
52 for (int q = 0; q < q1d; q++)
53 {
54 real_t du[3] = {0.0, 0.0, 0.0};
55 for (int d = 0; d < d1d; d++)
56 {
57 du[0] += g(q, d) * x(d, c, e);
58 }
59 if (GRAD_PHYS)
60 {
61 if (sdim == 1) { du[0] /= j(q, 0, e); }
62 else if (sdim == 2)
63 {
64 const real_t Jloc[2] = {j(q,0,e), j(q,1,e)};
65 real_t Jinv[3];
67 const real_t U = Jinv[0]*du[0];
68 const real_t V = Jinv[1]*du[0];
69 du[0] = U;
70 du[1] = V;
71 }
72 else // sdim == 3
73 {
74 const real_t Jloc[3] = {j(q,0,e), j(q,1,e), j(q,2,e)};
75 real_t Jinv[3];
77 const real_t U = Jinv[0]*du[0];
78 const real_t V = Jinv[1]*du[0];
79 const real_t W = Jinv[2]*du[0];
80 du[0] = U;
81 du[1] = V;
82 du[2] = W;
83 }
84 }
85 for (int d = 0; d < sdim; ++d)
86 {
87 if (Q_LAYOUT == QVectorLayout::byVDIM) { y(c, d, q, e) = du[d]; }
88 if (Q_LAYOUT == QVectorLayout::byNODES) { y(q, c, d, e) = du[d]; }
89 }
90 }
91 }
92 });
93}
94
95// Template compute kernel for derivatives in 2D: tensor product version.
96template<QVectorLayout Q_LAYOUT, bool GRAD_PHYS,
97 int T_VDIM = 0, int T_D1D = 0, int T_Q1D = 0,
98 int T_NBZ = 1>
99static void Derivatives2D(const int NE,
100 const real_t *b_,
101 const real_t *g_,
102 const real_t *j_,
103 const real_t *x_,
104 real_t *y_,
105 const int sdim = 2,
106 const int vdim = 0,
107 const int d1d = 0,
108 const int q1d = 0)
109{
110 const int D1D = T_D1D ? T_D1D : d1d;
111 const int Q1D = T_Q1D ? T_Q1D : q1d;
112 const int VDIM = T_VDIM ? T_VDIM : vdim;
113 const int SDIM = GRAD_PHYS ? sdim : 2;
114 static constexpr int NBZ = T_NBZ ? T_NBZ : 1;
115
116 const auto b = Reshape(b_, Q1D, D1D);
117 const auto g = Reshape(g_, Q1D, D1D);
118 const auto j = Reshape(j_, Q1D, Q1D, SDIM, 2, NE);
119 const auto x = Reshape(x_, D1D, D1D, VDIM, NE);
120 auto y = Q_LAYOUT == QVectorLayout:: byNODES ?
121 Reshape(y_, Q1D, Q1D, VDIM, SDIM, NE):
122 Reshape(y_, VDIM, SDIM, Q1D, Q1D, NE);
123
124 mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e)
125 {
126 const int D1D = T_D1D ? T_D1D : d1d;
127 const int Q1D = T_Q1D ? T_Q1D : q1d;
128 const int VDIM = T_VDIM ? T_VDIM : vdim;
129 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
130 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
131
132 const int tidz = MFEM_THREAD_ID(z);
133 MFEM_SHARED real_t BG[2][MQ1*MD1];
134 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,b,g,BG);
135 DeviceMatrix B(BG[0], D1D, Q1D);
136 DeviceMatrix G(BG[1], D1D, Q1D);
137
138 MFEM_SHARED real_t XY[NBZ][MD1*MD1];
139 DeviceTensor<2> X((real_t*)(XY+tidz), D1D, D1D);
140
141 MFEM_SHARED real_t s_DQ[2][NBZ][MD1*MQ1];
142 DeviceTensor<2> DQ0(s_DQ[0][tidz], D1D, Q1D);
143 DeviceTensor<2> DQ1(s_DQ[1][tidz], D1D, Q1D);
144
145 for (int c = 0; c < VDIM; ++c)
146 {
147 kernels::internal::LoadX<MD1,NBZ>(e,D1D,c,x,XY);
148 MFEM_FOREACH_THREAD(dy,y,D1D)
149 {
150 MFEM_FOREACH_THREAD(qx,x,Q1D)
151 {
152 real_t u = 0.0;
153 real_t v = 0.0;
154 for (int dx = 0; dx < D1D; ++dx)
155 {
156 const real_t input = X(dx,dy);
157 u += input * B(dx,qx);
158 v += input * G(dx,qx);
159 }
160 DQ0(dy,qx) = u;
161 DQ1(dy,qx) = v;
162 }
163 }
164 MFEM_SYNC_THREAD;
165 MFEM_FOREACH_THREAD(qy,y,Q1D)
166 {
167 MFEM_FOREACH_THREAD(qx,x,Q1D)
168 {
169 real_t du[3] = {0.0, 0.0, 0.0};
170 for (int dy = 0; dy < D1D; ++dy)
171 {
172 du[0] += DQ1(dy,qx) * B(dy,qy);
173 du[1] += DQ0(dy,qx) * G(dy,qy);
174 }
175 if (GRAD_PHYS)
176 {
177 if (SDIM == 2)
178 {
179 real_t Jloc[4], Jinv[4];
180 Jloc[0] = j(qx,qy,0,0,e);
181 Jloc[1] = j(qx,qy,1,0,e);
182 Jloc[2] = j(qx,qy,0,1,e);
183 Jloc[3] = j(qx,qy,1,1,e);
184 kernels::CalcInverse<2>(Jloc, Jinv);
185 const real_t U = Jinv[0]*du[0] + Jinv[1]*du[1];
186 const real_t V = Jinv[2]*du[0] + Jinv[3]*du[1];
187 du[0] = U;
188 du[1] = V;
189 }
190 else
191 {
192 real_t Jloc[6], Jinv[6];
193 Jloc[0] = j(qx,qy,0,0,e);
194 Jloc[1] = j(qx,qy,1,0,e);
195 Jloc[2] = j(qx,qy,2,0,e);
196 Jloc[3] = j(qx,qy,0,1,e);
197 Jloc[4] = j(qx,qy,1,1,e);
198 Jloc[5] = j(qx,qy,2,1,e);
200 const real_t U = Jinv[0]*du[0] + Jinv[1]*du[1];
201 const real_t V = Jinv[2]*du[0] + Jinv[3]*du[1];
202 const real_t W = Jinv[4]*du[0] + Jinv[5]*du[1];
203 du[0] = U;
204 du[1] = V;
205 du[2] = W;
206 }
207 }
208 for (int d = 0; d < SDIM; ++d)
209 {
210 if (Q_LAYOUT == QVectorLayout::byVDIM)
211 {
212 y(c,d,qx,qy,e) = du[d];
213 }
214 else // Q_LAYOUT == QVectorLayout::byNODES
215 {
216 y(qx,qy,c,d,e) = du[d];
217 }
218 }
219 }
220 }
221 MFEM_SYNC_THREAD;
222 }
223 });
224}
225
226// Template compute kernel for derivatives in 3D: tensor product version.
227template<QVectorLayout Q_LAYOUT, bool GRAD_PHYS,
228 int T_VDIM = 0, int T_D1D = 0, int T_Q1D = 0>
229static void Derivatives3D(const int NE,
230 const real_t *b_,
231 const real_t *g_,
232 const real_t *j_,
233 const real_t *x_,
234 real_t *y_,
235 const int vdim = 0,
236 const int d1d = 0,
237 const int q1d = 0)
238{
239 const int D1D = T_D1D ? T_D1D : d1d;
240 const int Q1D = T_Q1D ? T_Q1D : q1d;
241 const int VDIM = T_VDIM ? T_VDIM : vdim;
242
243 const auto b = Reshape(b_, Q1D, D1D);
244 const auto g = Reshape(g_, Q1D, D1D);
245 const auto j = Reshape(j_, Q1D, Q1D, Q1D, 3, 3, NE);
246 const auto x = Reshape(x_, D1D, D1D, D1D, VDIM, NE);
247 auto y = Q_LAYOUT == QVectorLayout:: byNODES ?
248 Reshape(y_, Q1D, Q1D, Q1D, VDIM, 3, NE):
249 Reshape(y_, VDIM, 3, Q1D, Q1D, Q1D, NE);
250
251 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
252 {
253 const int D1D = T_D1D ? T_D1D : d1d;
254 const int Q1D = T_Q1D ? T_Q1D : q1d;
255 const int VDIM = T_VDIM ? T_VDIM : vdim;
256 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_INTERP_1D;
257 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_INTERP_1D;
258
259 MFEM_SHARED real_t BG[2][MQ1*MD1];
260 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,b,g,BG);
261 DeviceMatrix B(BG[0], D1D, Q1D);
262 DeviceMatrix G(BG[1], D1D, Q1D);
263
264 MFEM_SHARED real_t sm0[3][MQ1*MQ1*MQ1];
265 MFEM_SHARED real_t sm1[3][MQ1*MQ1*MQ1];
266 DeviceTensor<3> X(sm0[2], D1D, D1D, D1D);
267 DeviceTensor<3> DDQ0(sm0[0], D1D, D1D, Q1D);
268 DeviceTensor<3> DDQ1(sm0[1], D1D, D1D, Q1D);
269 DeviceTensor<3> DQQ0(sm1[0], D1D, Q1D, Q1D);
270 DeviceTensor<3> DQQ1(sm1[1], D1D, Q1D, Q1D);
271 DeviceTensor<3> DQQ2(sm1[2], D1D, Q1D, Q1D);
272
273 for (int c = 0; c < VDIM; ++c)
274 {
275 kernels::internal::LoadX(e,D1D,c,x,X);
276 MFEM_FOREACH_THREAD(dz,z,D1D)
277 {
278 MFEM_FOREACH_THREAD(dy,y,D1D)
279 {
280 MFEM_FOREACH_THREAD(qx,x,Q1D)
281 {
282 real_t u = 0.0;
283 real_t v = 0.0;
284 for (int dx = 0; dx < D1D; ++dx)
285 {
286 const real_t input = X(dx,dy,dz);
287 u += input * B(dx,qx);
288 v += input * G(dx,qx);
289 }
290 DDQ0(dz,dy,qx) = u;
291 DDQ1(dz,dy,qx) = v;
292 }
293 }
294 }
295 MFEM_SYNC_THREAD;
296 MFEM_FOREACH_THREAD(dz,z,D1D)
297 {
298 MFEM_FOREACH_THREAD(qy,y,Q1D)
299 {
300 MFEM_FOREACH_THREAD(qx,x,Q1D)
301 {
302 real_t u = 0.0;
303 real_t v = 0.0;
304 real_t w = 0.0;
305 for (int dy = 0; dy < D1D; ++dy)
306 {
307 u += DDQ1(dz,dy,qx) * B(dy,qy);
308 v += DDQ0(dz,dy,qx) * G(dy,qy);
309 w += DDQ0(dz,dy,qx) * B(dy,qy);
310 }
311 DQQ0(dz,qy,qx) = u;
312 DQQ1(dz,qy,qx) = v;
313 DQQ2(dz,qy,qx) = w;
314 }
315 }
316 }
317 MFEM_SYNC_THREAD;
318 MFEM_FOREACH_THREAD(qz,z,Q1D)
319 {
320 MFEM_FOREACH_THREAD(qy,y,Q1D)
321 {
322 MFEM_FOREACH_THREAD(qx,x,Q1D)
323 {
324 real_t u = 0.0;
325 real_t v = 0.0;
326 real_t w = 0.0;
327 for (int dz = 0; dz < D1D; ++dz)
328 {
329 u += DQQ0(dz,qy,qx) * B(dz,qz);
330 v += DQQ1(dz,qy,qx) * B(dz,qz);
331 w += DQQ2(dz,qy,qx) * G(dz,qz);
332 }
333 if (GRAD_PHYS)
334 {
335 real_t Jloc[9], Jinv[9];
336 for (int col = 0; col < 3; col++)
337 {
338 for (int row = 0; row < 3; row++)
339 {
340 Jloc[row+3*col] = j(qx,qy,qz,row,col,e);
341 }
342 }
343 kernels::CalcInverse<3>(Jloc, Jinv);
344 const real_t U = Jinv[0]*u + Jinv[1]*v + Jinv[2]*w;
345 const real_t V = Jinv[3]*u + Jinv[4]*v + Jinv[5]*w;
346 const real_t W = Jinv[6]*u + Jinv[7]*v + Jinv[8]*w;
347 u = U; v = V; w = W;
348 }
349 if (Q_LAYOUT == QVectorLayout::byVDIM)
350 {
351 y(c,0,qx,qy,qz,e) = u;
352 y(c,1,qx,qy,qz,e) = v;
353 y(c,2,qx,qy,qz,e) = w;
354 }
355 if (Q_LAYOUT == QVectorLayout::byNODES)
356 {
357 y(qx,qy,qz,c,0,e) = u;
358 y(qx,qy,qz,c,1,e) = v;
359 y(qx,qy,qz,c,2,e) = w;
360 }
361 }
362 }
363 }
364 MFEM_SYNC_THREAD;
365 }
366 });
367}
368
369} // namespace quadrature_interpolator
370
371} // namespace internal
372
373} // namespace mfem
real_t b
Definition: lissajous.cpp:42
constexpr int SDIM
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
Definition: kernels.hpp:1072
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
Definition: kernels.hpp:1089
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
Definition: kernels.hpp:1080
real_t u(const Vector &xvec)
Definition: lor_mms.hpp:22
DeviceTensor< 2, real_t > DeviceMatrix
Definition: dtensor.hpp:143
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
Definition: forall.hpp:769
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:775
QVectorLayout
Type describing possible layouts for Q-vectors.
Definition: fespace.hpp:53
@ byNODES
NQPT x VDIM x NE (values) / NQPT x VDIM x DIM x NE (grads)
@ byVDIM
VDIM x NQPT x NE (values) / VDIM x DIM x NQPT x NE (grads)
float real_t
Definition: config.hpp:43
void forall(int N, lambda &&body)
Definition: forall.hpp:754