MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
lor_h1_impl.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "lor_util.hpp"
13#include "../../linalg/dtensor.hpp"
14#include "../../general/forall.hpp"
15
16namespace mfem
17{
18
19template <int ORDER, int SDIM>
21{
22 const int nel_ho = fes_ho.GetNE();
23
24 static constexpr int nv = 4;
25 static constexpr int dim = 2;
26 static constexpr int ddm2 = (dim*(dim+1))/2;
27 static constexpr int nd1d = ORDER + 1;
28 static constexpr int ndof_per_el = nd1d*nd1d;
29 static constexpr int nnz_per_row = 9;
30 static constexpr int sz_local_mat = nv*nv;
31
32 const bool const_mq = c1.Size() == 1;
33 const auto MQ = const_mq
34 ? Reshape(c1.Read(), 1, 1, 1)
35 : Reshape(c1.Read(), nd1d, nd1d, nel_ho);
36 const bool const_dq = c2.Size() == 1;
37 const auto DQ = const_dq
38 ? Reshape(c2.Read(), 1, 1, 1)
39 : Reshape(c2.Read(), nd1d, nd1d, nel_ho);
40
41 sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
42 auto V = Reshape(sparse_ij.Write(), nnz_per_row, nd1d, nd1d, nel_ho);
43
44 auto X = X_vert.Read();
45
46 mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho)
47 {
48 // Assemble a sparse matrix over the macro-element by looping over each
49 // subelement.
50 // V(j,ix,iy) stores the jth nonzero in the row of the sparse matrix
51 // corresponding to local DOF (ix, iy).
52 MFEM_FOREACH_THREAD(iy,y,nd1d)
53 {
54 MFEM_FOREACH_THREAD(ix,x,nd1d)
55 {
56 for (int j=0; j<nnz_per_row; ++j)
57 {
58 V(j,ix,iy,iel_ho) = 0.0;
59 }
60 }
61 }
62 MFEM_SYNC_THREAD;
63
64 // Compute geometric factors at quadrature points
65 MFEM_FOREACH_THREAD(ky,y,ORDER)
66 {
67 MFEM_FOREACH_THREAD(kx,x,ORDER)
68 {
69 real_t Q_[(ddm2 + 1)*nv];
70 real_t local_mat_[sz_local_mat];
71 DeviceTensor<3> Q(Q_, ddm2 + 1, 2, 2);
72 DeviceTensor<2> local_mat(local_mat_, nv, nv);
73
74 for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
75
76 SetupLORQuadData2D<ORDER,SDIM,false,false>(X, iel_ho, kx, ky, Q, false);
77
78 for (int iqx=0; iqx<2; ++iqx)
79 {
80 for (int iqy=0; iqy<2; ++iqy)
81 {
82 const real_t mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
83 const real_t dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
84 for (int jy=0; jy<2; ++jy)
85 {
86 const real_t bjy = (jy == iqy) ? 1.0 : 0.0;
87 const real_t gjy = (jy == 0) ? -1.0 : 1.0;
88 for (int jx=0; jx<2; ++jx)
89 {
90 const real_t bjx = (jx == iqx) ? 1.0 : 0.0;
91 const real_t gjx = (jx == 0) ? -1.0 : 1.0;
92
93 const real_t djx = gjx*bjy;
94 const real_t djy = bjx*gjy;
95
96 int jj_loc = jx + 2*jy;
97
98 for (int iy=0; iy<2; ++iy)
99 {
100 const real_t biy = (iy == iqy) ? 1.0 : 0.0;
101 const real_t giy = (iy == 0) ? -1.0 : 1.0;
102 for (int ix=0; ix<2; ++ix)
103 {
104 const real_t bix = (ix == iqx) ? 1.0 : 0.0;
105 const real_t gix = (ix == 0) ? -1.0 : 1.0;
106
107 const real_t dix = gix*biy;
108 const real_t diy = bix*giy;
109
110 int ii_loc = ix + 2*iy;
111
112 // Only store the lower-triangular part of
113 // the matrix (by symmetry).
114 if (jj_loc > ii_loc) { continue; }
115
116 real_t val = 0.0;
117 val += dix*djx*Q(0,iqy,iqx);
118 val += (dix*djy + diy*djx)*Q(1,iqy,iqx);
119 val += diy*djy*Q(2,iqy,iqx);
120 val *= dq;
121
122 val += mq*bix*biy*bjx*bjy*Q(3,iqy,iqx);
123
124 local_mat(ii_loc, jj_loc) += val;
125 }
126 }
127 }
128 }
129 }
130 }
131 // Assemble the local matrix into the macro-element sparse matrix
132 // in a format similar to coordinate format. The (I,J) arrays
133 // are implicit (not stored explicitly).
134 for (int ii_loc=0; ii_loc<nv; ++ii_loc)
135 {
136 const int ix = ii_loc%2;
137 const int iy = ii_loc/2;
138 for (int jj_loc=0; jj_loc<nv; ++jj_loc)
139 {
140 const int jx = jj_loc%2;
141 const int jy = jj_loc/2;
142 const int jj_off = (jx-ix+1) + 3*(jy-iy+1);
143
144 // Symmetry
145 if (jj_loc <= ii_loc)
146 {
147 AtomicAdd(V(jj_off, ix+kx, iy+ky, iel_ho), local_mat(ii_loc, jj_loc));
148 }
149 else
150 {
151 AtomicAdd(V(jj_off, ix+kx, iy+ky, iel_ho), local_mat(jj_loc, ii_loc));
152 }
153 }
154 }
155 }
156 }
157 });
158
159 sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
160 sparse_mapping = -1;
161 auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
162 for (int iy=0; iy<nd1d; ++iy)
163 {
164 const int jy_begin = (iy > 0) ? iy - 1 : 0;
165 const int jy_end = (iy < ORDER) ? iy + 1 : ORDER;
166 for (int ix=0; ix<nd1d; ++ix)
167 {
168 const int jx_begin = (ix > 0) ? ix - 1 : 0;
169 const int jx_end = (ix < ORDER) ? ix + 1 : ORDER;
170 const int ii_el = ix + nd1d*iy;
171 for (int jy=jy_begin; jy<=jy_end; ++jy)
172 {
173 for (int jx=jx_begin; jx<=jx_end; ++jx)
174 {
175 const int jj_off = (jx-ix+1) + 3*(jy-iy+1);
176 const int jj_el = jx + nd1d*jy;
177 map(jj_off, ii_el) = jj_el;
178 }
179 }
180 }
181 }
182}
183
184template <int ORDER>
186{
187 const int nel_ho = fes_ho.GetNE();
188 static constexpr int nv = 8;
189 static constexpr int dim = 3;
190 static constexpr int ddm2 = (dim*(dim+1))/2;
191 static constexpr int nd1d = ORDER + 1;
192 static constexpr int ndof_per_el = nd1d*nd1d*nd1d;
193 static constexpr int nnz_per_row = 27;
194 static constexpr int sz_grad_A = 3*3*2*2*2*2;
195 static constexpr int sz_grad_B = sz_grad_A*2;
196 static constexpr int sz_mass_A = 2*2*2*2;
197 static constexpr int sz_mass_B = sz_mass_A*2;
198 static constexpr int sz_local_mat = nv*nv;
199
200 const bool const_mq = c1.Size() == 1;
201 const auto MQ = const_mq
202 ? Reshape(c1.Read(), 1, 1, 1, 1)
203 : Reshape(c1.Read(), nd1d, nd1d, nd1d, nel_ho);
204 const bool const_dq = c2.Size() == 1;
205 const auto DQ = const_dq
206 ? Reshape(c2.Read(), 1, 1, 1, 1)
207 : Reshape(c2.Read(), nd1d, nd1d, nd1d, nel_ho);
208
209 sparse_ij.SetSize(nel_ho*ndof_per_el*nnz_per_row);
210 auto V = Reshape(sparse_ij.Write(), nnz_per_row, nd1d, nd1d, nd1d, nel_ho);
211
212 auto X = X_vert.Read();
213
214 // Last thread dimension is lowered to avoid "too many resources" error
215 mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
216 [=] MFEM_HOST_DEVICE (int iel_ho)
217 {
218 // Assemble a sparse matrix over the macro-element by looping over each
219 // subelement.
220 // V(j,i) stores the jth nonzero in the ith row of the sparse matrix.
221 MFEM_FOREACH_THREAD(iz,z,nd1d)
222 {
223 MFEM_FOREACH_THREAD(iy,y,nd1d)
224 {
225 MFEM_FOREACH_THREAD(ix,x,nd1d)
226 {
227 MFEM_UNROLL(nnz_per_row)
228 for (int j=0; j<nnz_per_row; ++j)
229 {
230 V(j,ix,iy,iz,iel_ho) = 0.0;
231 }
232 }
233 }
234 }
235 MFEM_SYNC_THREAD;
236
237 // Compute geometric factors at quadrature points
238 MFEM_FOREACH_THREAD(kz,z,ORDER)
239 {
240 MFEM_FOREACH_THREAD(ky,y,ORDER)
241 {
242 MFEM_FOREACH_THREAD(kx,x,ORDER)
243 {
244 real_t Q_[(ddm2 + 1)*nv];
245 real_t grad_A_[sz_grad_A];
246 real_t grad_B_[sz_grad_B];
247 real_t mass_A_[sz_mass_A];
248 real_t mass_B_[sz_mass_B];
249 real_t local_mat_[sz_local_mat];
250
251 DeviceTensor<4> Q(Q_, ddm2 + 1, 2, 2, 2);
252 DeviceTensor<2> local_mat(local_mat_, nv, nv);
253 DeviceTensor<6> grad_A(grad_A_, 3, 3, 2, 2, 2, 2);
254 DeviceTensor<7> grad_B(grad_B_, 3, 3, 2, 2, 2, 2, 2);
255 DeviceTensor<4> mass_A(mass_A_, 2, 2, 2, 2);
256 DeviceTensor<5> mass_B(mass_B_, 2, 2, 2, 2, 2);
257
258 // local_mat is the local (dense) stiffness matrix
259 for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
260
261 // Intermediate quantities
262 // (see e.g. Mora and Demkowicz for notation).
263 for (int i=0; i<sz_grad_A; ++i) { grad_A[i] = 0.0; }
264 for (int i=0; i<sz_grad_B; ++i) { grad_B[i] = 0.0; }
265
266 for (int i=0; i<sz_mass_A; ++i) { mass_A[i] = 0.0; }
267 for (int i=0; i<sz_mass_B; ++i) { mass_B[i] = 0.0; }
268
269 real_t vx[8], vy[8], vz[8];
270 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
271
272 // MFEM_UNROLL(2)
273 for (int iqz=0; iqz<2; ++iqz)
274 {
275 // MFEM_UNROLL(2)
276 for (int iqy=0; iqy<2; ++iqy)
277 {
278 // MFEM_UNROLL(2)
279 for (int iqx=0; iqx<2; ++iqx)
280 {
281 const real_t x = iqx;
282 const real_t y = iqy;
283 const real_t z = iqz;
284 const real_t w = 1.0/8.0;
285
286 real_t J_[3*3];
287 DeviceTensor<2> J(J_, 3, 3);
288
289 Jacobian3D(x, y, z, vx, vy, vz, J);
290
291 const real_t detJ = Det3D(J);
292 const real_t w_detJ = w/detJ;
293
294 // adj(J)
295 real_t A_[3*3];
296 DeviceTensor<2> A(A_, 3, 3);
297 Adjugate3D(J, A);
298
299 Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2)); // 1,1
300 Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2)); // 2,1
301 Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2)); // 3,1
302 Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2)); // 2,2
303 Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2)); // 3,2
304 Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2)); // 3,3
305 Q(6,iqz,iqy,iqx) = w*detJ;
306 }
307 }
308 }
309
310 // MFEM_UNROLL(2)
311 for (int iqx=0; iqx<2; ++iqx)
312 {
313 // MFEM_UNROLL(2)
314 for (int jz=0; jz<2; ++jz)
315 {
316 // Note loop starts at iz=jz here, taking advantage of
317 // symmetries.
318 // MFEM_UNROLL(2)
319 for (int iz=jz; iz<2; ++iz)
320 {
321 // MFEM_UNROLL(2)
322 for (int iqy=0; iqy<2; ++iqy)
323 {
324 // MFEM_UNROLL(2)
325 for (int iqz=0; iqz<2; ++iqz)
326 {
327 const real_t mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
328 const real_t dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
329
330 const real_t biz = (iz == iqz) ? 1.0 : 0.0;
331 const real_t giz = (iz == 0) ? -1.0 : 1.0;
332
333 const real_t bjz = (jz == iqz) ? 1.0 : 0.0;
334 const real_t gjz = (jz == 0) ? -1.0 : 1.0;
335
336 const real_t J11 = Q(0,iqz,iqy,iqx);
337 const real_t J21 = Q(1,iqz,iqy,iqx);
338 const real_t J31 = Q(2,iqz,iqy,iqx);
339 const real_t J12 = J21;
340 const real_t J22 = Q(3,iqz,iqy,iqx);
341 const real_t J32 = Q(4,iqz,iqy,iqx);
342 const real_t J13 = J31;
343 const real_t J23 = J32;
344 const real_t J33 = Q(5,iqz,iqy,iqx);
345
346 grad_A(0,0,iqy,iz,jz,iqx) += dq*J11*biz*bjz;
347 grad_A(1,0,iqy,iz,jz,iqx) += dq*J21*biz*bjz;
348 grad_A(2,0,iqy,iz,jz,iqx) += dq*J31*giz*bjz;
349 grad_A(0,1,iqy,iz,jz,iqx) += dq*J12*biz*bjz;
350 grad_A(1,1,iqy,iz,jz,iqx) += dq*J22*biz*bjz;
351 grad_A(2,1,iqy,iz,jz,iqx) += dq*J32*giz*bjz;
352 grad_A(0,2,iqy,iz,jz,iqx) += dq*J13*biz*gjz;
353 grad_A(1,2,iqy,iz,jz,iqx) += dq*J23*biz*gjz;
354 grad_A(2,2,iqy,iz,jz,iqx) += dq*J33*giz*gjz;
355
356 real_t wdetJ = Q(6,iqz,iqy,iqx);
357 mass_A(iqy,iz,jz,iqx) += mq*wdetJ*biz*bjz;
358 }
359 // MFEM_UNROLL(2)
360 for (int jy=0; jy<2; ++jy)
361 {
362 // MFEM_UNROLL(2)
363 for (int iy=0; iy<2; ++iy)
364 {
365 const real_t biy = (iy == iqy) ? 1.0 : 0.0;
366 const real_t giy = (iy == 0) ? -1.0 : 1.0;
367
368 const real_t bjy = (jy == iqy) ? 1.0 : 0.0;
369 const real_t gjy = (jy == 0) ? -1.0 : 1.0;
370
371 grad_B(0,0,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(0,0,iqy,iz,jz,iqx);
372 grad_B(1,0,iy,jy,iz,jz,iqx) += giy*bjy*grad_A(1,0,iqy,iz,jz,iqx);
373 grad_B(2,0,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(2,0,iqy,iz,jz,iqx);
374 grad_B(0,1,iy,jy,iz,jz,iqx) += biy*gjy*grad_A(0,1,iqy,iz,jz,iqx);
375 grad_B(1,1,iy,jy,iz,jz,iqx) += giy*gjy*grad_A(1,1,iqy,iz,jz,iqx);
376 grad_B(2,1,iy,jy,iz,jz,iqx) += biy*gjy*grad_A(2,1,iqy,iz,jz,iqx);
377 grad_B(0,2,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(0,2,iqy,iz,jz,iqx);
378 grad_B(1,2,iy,jy,iz,jz,iqx) += giy*bjy*grad_A(1,2,iqy,iz,jz,iqx);
379 grad_B(2,2,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(2,2,iqy,iz,jz,iqx);
380
381 mass_B(iy,jy,iz,jz,iqx) += biy*bjy*mass_A(iqy,iz,jz,iqx);
382 }
383 }
384 }
385 // MFEM_UNROLL(2)
386 for (int jy=0; jy<2; ++jy)
387 {
388 // MFEM_UNROLL(2)
389 for (int jx=0; jx<2; ++jx)
390 {
391 // MFEM_UNROLL(2)
392 for (int iy=0; iy<2; ++iy)
393 {
394 // MFEM_UNROLL(2)
395 for (int ix=0; ix<2; ++ix)
396 {
397 const real_t bix = (ix == iqx) ? 1.0 : 0.0;
398 const real_t gix = (ix == 0) ? -1.0 : 1.0;
399
400 const real_t bjx = (jx == iqx) ? 1.0 : 0.0;
401 const real_t gjx = (jx == 0) ? -1.0 : 1.0;
402
403 int ii_loc = ix + 2*iy + 4*iz;
404 int jj_loc = jx + 2*jy + 4*jz;
405
406 // Only store the lower-triangular part of
407 // the matrix (by symmetry).
408 if (jj_loc > ii_loc) { continue; }
409
410 real_t val = 0.0;
411 val += gix*gjx*grad_B(0,0,iy,jy,iz,jz,iqx);
412 val += bix*gjx*grad_B(1,0,iy,jy,iz,jz,iqx);
413 val += bix*gjx*grad_B(2,0,iy,jy,iz,jz,iqx);
414 val += gix*bjx*grad_B(0,1,iy,jy,iz,jz,iqx);
415 val += bix*bjx*grad_B(1,1,iy,jy,iz,jz,iqx);
416 val += bix*bjx*grad_B(2,1,iy,jy,iz,jz,iqx);
417 val += gix*bjx*grad_B(0,2,iy,jy,iz,jz,iqx);
418 val += bix*bjx*grad_B(2,2,iy,jy,iz,jz,iqx);
419 val += bix*bjx*grad_B(1,2,iy,jy,iz,jz,iqx);
420
421 val += bix*bjx*mass_B(iy,jy,iz,jz,iqx);
422
423 local_mat(ii_loc, jj_loc) += val;
424 }
425 }
426 }
427 }
428 }
429 }
430 }
431 // Assemble the local matrix into the macro-element sparse matrix
432 // in a format similar to coordinate format. The (I,J) arrays
433 // are implicit (not stored explicitly).
434 // MFEM_UNROLL(8)
435 for (int ii_loc=0; ii_loc<nv; ++ii_loc)
436 {
437 const int ix = ii_loc%2;
438 const int iy = (ii_loc/2)%2;
439 const int iz = ii_loc/2/2;
440
441 for (int jj_loc=0; jj_loc<nv; ++jj_loc)
442 {
443 const int jx = jj_loc%2;
444 const int jy = (jj_loc/2)%2;
445 const int jz = jj_loc/2/2;
446 const int jj_off = (jx-ix+1) + 3*(jy-iy+1) + 9*(jz-iz+1);
447
448 if (jj_loc <= ii_loc)
449 {
450 AtomicAdd(V(jj_off, ix+kx, iy+ky, iz+kz, iel_ho), local_mat(ii_loc, jj_loc));
451 }
452 else
453 {
454 AtomicAdd(V(jj_off, ix+kx, iy+ky, iz+kz, iel_ho), local_mat(jj_loc, ii_loc));
455 }
456 }
457 }
458 }
459 }
460 }
461 });
462
463 sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
464 sparse_mapping = -1;
465 auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
466 for (int iz=0; iz<nd1d; ++iz)
467 {
468 const int jz_begin = (iz > 0) ? iz - 1 : 0;
469 const int jz_end = (iz < ORDER) ? iz + 1 : ORDER;
470 for (int iy=0; iy<nd1d; ++iy)
471 {
472 const int jy_begin = (iy > 0) ? iy - 1 : 0;
473 const int jy_end = (iy < ORDER) ? iy + 1 : ORDER;
474 for (int ix=0; ix<nd1d; ++ix)
475 {
476 const int jx_begin = (ix > 0) ? ix - 1 : 0;
477 const int jx_end = (ix < ORDER) ? ix + 1 : ORDER;
478
479 const int ii_el = ix + nd1d*(iy + nd1d*iz);
480
481 for (int jz=jz_begin; jz<=jz_end; ++jz)
482 {
483 for (int jy=jy_begin; jy<=jy_end; ++jy)
484 {
485 for (int jx=jx_begin; jx<=jx_end; ++jx)
486 {
487 const int jj_off = (jx-ix+1) + 3*(jy-iy+1) + 9*(jz-iz+1);
488 const int jj_el = jx + nd1d*(jy + nd1d*jz);
489 map(jj_off, ii_el) = jj_el;
490 }
491 }
492 }
493 }
494 }
495 }
496}
497
498} // namespace mfem
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition: backends.hpp:92
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:697
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition: array.hpp:337
FiniteElementSpace & fes_ho
The associated high-order space.
CoefficientVector c2
Coefficient of second integrator.
Vector & X_vert
Mesh coordinate vector.
Vector & sparse_ij
Local element sparsity matrix data.
CoefficientVector c1
Coefficient of first integrator.
Array< int > & sparse_mapping
Local element sparsity pattern.
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:82
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:740
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:474
int Size() const
Returns the size of the vector.
Definition: vector.hpp:218
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:538
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:482
int dim
Definition: ex24.cpp:53
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
MFEM_HOST_DEVICE void Jacobian3D(const real_t x, const real_t y, const real_t z, const real_t vx[8], const real_t vy[8], const real_t vz[8], DeviceMatrix &J)
Definition: lor_util.hpp:210
void forall_2D(int N, int X, int Y, lambda &&body)
Definition: forall.hpp:763
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:775
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
Definition: lor_util.hpp:28
MFEM_HOST_DEVICE void Adjugate3D(const DeviceMatrix &J, DeviceMatrix &A)
Definition: lor_util.hpp:254
float real_t
Definition: config.hpp:43