MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
kernel_helpers.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_ELASTICITY_KERNEL_HELPERS_HPP
13#define MFEM_ELASTICITY_KERNEL_HELPERS_HPP
14
15#include "mfem.hpp"
16#include "general/forall.hpp"
17#include "linalg/tensor.hpp"
18
19using mfem::internal::tensor;
20
21namespace mfem
22{
23
24namespace KernelHelpers
25{
26
27// MFEM_SHARED_3D_BLOCK_TENSOR definition
28// Should be moved in backends/cuda/hip header files.
29#if defined(__CUDA_ARCH__)
30#define MFEM_SHARED_3D_BLOCK_TENSOR(name,T,bx,by,bz,...)\
31MFEM_SHARED tensor<T,bx,by,bz,__VA_ARGS__> name;\
32name(threadIdx.x, threadIdx.y, threadIdx.z) = tensor<T,__VA_ARGS__> {};
33#else
34#define MFEM_SHARED_3D_BLOCK_TENSOR(name,...) tensor<__VA_ARGS__> name {};
35#endif
36
37// Kernel helper functions
38
39/**
40 * @brief Runtime check for memory restrictions that are determined at compile
41 * time.
42 *
43 * @param d1d Number of degrees of freedom in 1D.
44 * @param q1d Number of quadrature points in 1D.
45 */
46inline void CheckMemoryRestriction(int d1d, int q1d)
47{
48 MFEM_VERIFY(d1d <= q1d,
49 "There should be more or equal quadrature points "
50 "as there are dofs");
51 MFEM_VERIFY(d1d <= DeviceDofQuadLimits::Get().MAX_D1D,
52 "Maximum number of degrees of freedom in 1D reached."
53 "This number can be increased globally in general/forall.hpp if "
54 "device memory allows.");
55 MFEM_VERIFY(q1d <= DeviceDofQuadLimits::Get().MAX_Q1D,
56 "Maximum quadrature points 1D reached."
57 "This number can be increased globally in "
58 "general/forall.hpp if device memory allows.");
59}
60
61/**
62 * @brief Multi-component gradient evaluation from DOFs to quadrature points in
63 * reference coordinates.
64 *
65 * The implementation exploits sum factorization.
66 *
67 * @note DeviceTensor<2> means RANK=2
68 *
69 * @tparam dim Dimension.
70 * @tparam d1d Number of degrees of freedom in 1D.
71 * @tparam q1d er of quadrature points in 1D.
72 * @param B Basis functions evaluated at quadrature points in column-major
73 * layout q1d x d1d.
74 * @param G Gradients of basis functions evaluated at quadrature points in
75 * column major layout q1d x d1d.
76 * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
77 * q1d x q1d x q1d.
78 * @param U Input vector d1d x d1d x d1d x dim.
79 * @param dUdxi Gradient of the input vector wrt to reference coordinates. Size
80 * needed q1d x q1d x q1d x dim x dim.
81 */
82template <int dim, int d1d, int q1d>
83static inline MFEM_HOST_DEVICE void
84CalcGrad(const tensor<real_t, q1d, d1d> &B,
85 const tensor<real_t, q1d, d1d> &G,
86 tensor<real_t,2,3,q1d,q1d,q1d> &smem,
88 tensor<real_t, q1d, q1d, q1d, dim, dim> &dUdxi)
89{
90 for (int c = 0; c < dim; ++c)
91 {
92 MFEM_FOREACH_THREAD(dz,z,d1d)
93 {
94 MFEM_FOREACH_THREAD(dy,y,d1d)
95 {
96 MFEM_FOREACH_THREAD(dx,x,d1d)
97 {
98 smem(0,0,dx,dy,dz) = U(dx,dy,dz,c);
99 }
100 }
101 }
102 MFEM_SYNC_THREAD;
103 MFEM_FOREACH_THREAD(dz,z,d1d)
104 {
105 MFEM_FOREACH_THREAD(dy,y,d1d)
106 {
107 MFEM_FOREACH_THREAD(qx,x,q1d)
108 {
109 real_t u = 0.0;
110 real_t v = 0.0;
111 for (int dx = 0; dx < d1d; ++dx)
112 {
113 const real_t input = smem(0,0,dx,dy,dz);
114 u += input * B(qx,dx);
115 v += input * G(qx,dx);
116 }
117 smem(0,1,dz,dy,qx) = u;
118 smem(0,2,dz,dy,qx) = v;
119 }
120 }
121 }
122 MFEM_SYNC_THREAD;
123 MFEM_FOREACH_THREAD(dz,z,d1d)
124 {
125 MFEM_FOREACH_THREAD(qy,y,q1d)
126 {
127 MFEM_FOREACH_THREAD(qx,x,q1d)
128 {
129 real_t u = 0.0;
130 real_t v = 0.0;
131 real_t w = 0.0;
132 for (int dy = 0; dy < d1d; ++dy)
133 {
134 u += smem(0,2,dz,dy,qx) * B(qy,dy);
135 v += smem(0,1,dz,dy,qx) * G(qy,dy);
136 w += smem(0,1,dz,dy,qx) * B(qy,dy);
137 }
138 smem(1,0,dz,qy,qx) = u;
139 smem(1,1,dz,qy,qx) = v;
140 smem(1,2,dz,qy,qx) = w;
141 }
142 }
143 }
144 MFEM_SYNC_THREAD;
145 MFEM_FOREACH_THREAD(qz,z,q1d)
146 {
147 MFEM_FOREACH_THREAD(qy,y,q1d)
148 {
149 MFEM_FOREACH_THREAD(qx,x,q1d)
150 {
151 real_t u = 0.0;
152 real_t v = 0.0;
153 real_t w = 0.0;
154 for (int dz = 0; dz < d1d; ++dz)
155 {
156 u += smem(1,0,dz,qy,qx) * B(qz,dz);
157 v += smem(1,1,dz,qy,qx) * B(qz,dz);
158 w += smem(1,2,dz,qy,qx) * G(qz,dz);
159 }
160 dUdxi(qz,qy,qx,c,0) += u;
161 dUdxi(qz,qy,qx,c,1) += v;
162 dUdxi(qz,qy,qx,c,2) += w;
163 }
164 }
165 }
166 MFEM_SYNC_THREAD;
167 } // vdim
168}
169
170/**
171 * @brief Multi-component transpose gradient evaluation from DOFs to quadrature
172 * points in reference coordinates with contraction of the D vector.
173 *
174 * @tparam dim Dimension.
175 * @tparam d1d Number of degrees of freedom in 1D.
176 * @tparam q1d er of quadrature points in 1D.
177 * @param B Basis functions evaluated at quadrature points in column-major
178 * layout q1d x d1d.
179 * @param G Gradients of basis functions evaluated at quadrature points in
180 * column major layout q1d x d1d.
181 * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
182 * q1d x q1d x q1d.
183 * @param U Input vector q1d x q1d x q1d x dim.
184 * @param F Output vector that applied the gradient evaluation from DOFs to
185 * quadrature points in reference coordinates with contraction of the D operator
186 * on the input vector. Size is d1d x d1d x d1d x dim.
187 */
188template <int dim, int d1d, int q1d>
189static inline MFEM_HOST_DEVICE void
190CalcGradTSum(const tensor<real_t, q1d, d1d> &B,
191 const tensor<real_t, q1d, d1d> &G,
192 tensor<real_t, 2, 3, q1d, q1d, q1d> &smem,
193 const tensor<real_t, q1d, q1d, q1d, dim, dim> &U,
194 DeviceTensor<4, real_t> &F)
195{
196 for (int c = 0; c < dim; ++c)
197 {
198 MFEM_FOREACH_THREAD(qz, z, q1d)
199 {
200 MFEM_FOREACH_THREAD(qy, y, q1d)
201 {
202 MFEM_FOREACH_THREAD(dx, x, d1d)
203 {
204 real_t u = 0.0, v = 0.0, w = 0.0;
205 for (int qx = 0; qx < q1d; ++qx)
206 {
207 u += U(qx, qy, qz, 0, c) * G(qx, dx);
208 v += U(qx, qy, qz, 1, c) * B(qx, dx);
209 w += U(qx, qy, qz, 2, c) * B(qx, dx);
210 }
211 smem(0, 0, qz, qy, dx) = u;
212 smem(0, 1, qz, qy, dx) = v;
213 smem(0, 2, qz, qy, dx) = w;
214 }
215 }
216 }
217 MFEM_SYNC_THREAD;
218 MFEM_FOREACH_THREAD(qz, z, q1d)
219 {
220 MFEM_FOREACH_THREAD(dy, y, d1d)
221 {
222 MFEM_FOREACH_THREAD(dx, x, d1d)
223 {
224 real_t u = 0.0, v = 0.0, w = 0.0;
225 for (int qy = 0; qy < q1d; ++qy)
226 {
227 u += smem(0, 0, qz, qy, dx) * B(qy, dy);
228 v += smem(0, 1, qz, qy, dx) * G(qy, dy);
229 w += smem(0, 2, qz, qy, dx) * B(qy, dy);
230 }
231 smem(1, 0, qz, dy, dx) = u;
232 smem(1, 1, qz, dy, dx) = v;
233 smem(1, 2, qz, dy, dx) = w;
234 }
235 }
236 }
237 MFEM_SYNC_THREAD;
238 MFEM_FOREACH_THREAD(dz, z, d1d)
239 {
240 MFEM_FOREACH_THREAD(dy, y, d1d)
241 {
242 MFEM_FOREACH_THREAD(dx, x, d1d)
243 {
244 real_t u = 0.0, v = 0.0, w = 0.0;
245 for (int qz = 0; qz < q1d; ++qz)
246 {
247 u += smem(1, 0, qz, dy, dx) * B(qz, dz);
248 v += smem(1, 1, qz, dy, dx) * B(qz, dz);
249 w += smem(1, 2, qz, dy, dx) * G(qz, dz);
250 }
251 const real_t sum = u + v + w;
252 F(dx, dy, dz, c) += sum;
253 }
254 }
255 }
256 MFEM_SYNC_THREAD;
257 }
258}
259
260/**
261 * @brief Compute the gradient of all shape functions.
262 *
263 * @note TODO: Does not make use of shared memory on the GPU.
264 *
265 * @tparam dim Dimension.
266 * @tparam d1d Number of degrees of freedom in 1D.
267 * @tparam q1d er of quadrature points in 1D.
268 * @param qx Quadrature point x index.
269 * @param qy Quadrature point y index.
270 * @param qz Quadrature point z index.
271 * @param B Basis functions evaluated at quadrature points in column-major
272 * layout q1d x d1d.
273 * @param G Gradients of basis functions evaluated at quadrature points in
274 * column major layout q1d x d1d.
275 * @param invJ Inverse of the Jacobian at the quadrature point. Size is dim x
276 * dim.
277 *
278 * @return Gradient of all shape functions at the quadrature point. Size is d1d
279 * x d1d x d1d x dim.
280 */
281template <int dim, int d1d, int q1d>
282static inline MFEM_HOST_DEVICE tensor<real_t, d1d, d1d, d1d, dim>
283GradAllShapeFunctions(int qx, int qy, int qz,
284 const tensor<real_t, q1d, d1d> &B,
285 const tensor<real_t, q1d, d1d> &G,
286 const tensor<real_t, dim, dim> &invJ)
287{
288 tensor<real_t, d1d, d1d, d1d, dim> dphi_dx;
289 // G (x) B (x) B
290 // B (x) G (x) B
291 // B (x) B (x) G
292 for (int dx = 0; dx < d1d; dx++)
293 {
294 for (int dy = 0; dy < d1d; dy++)
295 {
296 for (int dz = 0; dz < d1d; dz++)
297 {
298 dphi_dx(dx,dy,dz) =
299 transpose(invJ) *
300 tensor<real_t, dim> {G(qx, dx) * B(qy, dy) * B(qz, dz),
301 B(qx, dx) * G(qy, dy) * B(qz, dz),
302 B(qx, dx) * B(qy, dy) * G(qz, dz)
303 };
304 }
305 }
306 }
307 return dphi_dx;
308}
309
310} // namespace KernelHelpers
311
312} // namespace mfem
313
314#endif
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:82
int dim
Definition: ex24.cpp:53
void CheckMemoryRestriction(int d1d, int q1d)
Runtime check for memory restrictions that are determined at compile time.
real_t u(const Vector &xvec)
Definition: lor_mms.hpp:22
float real_t
Definition: config.hpp:43
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition: forall.hpp:125
Implementation of the tensor class.