MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
lor_nd_impl.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "lor_util.hpp"
13#include "../../linalg/dtensor.hpp"
14#include "../../general/forall.hpp"
15
16namespace mfem
17{
18
19template <int ORDER, int SDIM>
21{
22 const int nel_ho = fes_ho.GetNE();
23
24 static constexpr int nv = 4;
25 static constexpr int ne = 4;
26 static constexpr int dim = 2;
27 static constexpr int ddm2 = (dim*(dim+1))/2;
28 static constexpr int ngeom = ddm2 + 1;
29 static constexpr int o = ORDER;
30 static constexpr int op1 = ORDER + 1;
31 static constexpr int ndof_per_el = dim*o*op1;
32 static constexpr int nnz_per_row = 7;
33 static constexpr int sz_local_mat = ne*ne;
34
35 const bool const_mq = c1.Size() == 1;
36 const auto MQ = const_mq
37 ? Reshape(c1.Read(), 1, 1, 1)
38 : Reshape(c1.Read(), op1, op1, nel_ho);
39 const bool const_dq = c2.Size() == 1;
40 const auto DQ = const_dq
41 ? Reshape(c2.Read(), 1, 1, 1)
42 : Reshape(c2.Read(), op1, op1, nel_ho);
43
44 sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
45 auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1, dim, nel_ho);
46
47 auto X = X_vert.Read();
48
49 mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho)
50 {
51 // Assemble a sparse matrix over the macro-element by looping over each
52 // subelement.
53 // V(j,ix,iy) stores the jth nonzero in the row of the sparse matrix
54 // corresponding to local DOF (ix, iy).
55 MFEM_FOREACH_THREAD(iy,y,o)
56 {
57 MFEM_FOREACH_THREAD(ix,x,op1)
58 {
59 for (int c=0; c<2; ++c)
60 {
61 for (int j=0; j<nnz_per_row; ++j)
62 {
63 V(j,ix+iy*op1,c,iel_ho) = 0.0;
64 }
65 }
66 }
67 }
68 MFEM_SYNC_THREAD;
69
70 // Loop over the sub-elements
71 MFEM_FOREACH_THREAD(ky,y,ORDER)
72 {
73 MFEM_FOREACH_THREAD(kx,x,ORDER)
74 {
75 // Compute geometric factors at quadrature points
76 real_t Q_[ngeom*nv];
77 real_t local_mat_[sz_local_mat];
78
79 DeviceTensor<3> Q(Q_, ngeom, 2, 2);
80 DeviceTensor<2> local_mat(local_mat_, ne, ne);
81
82 // local_mat is the local (dense) stiffness matrix
83 for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
84
85 SetupLORQuadData2D<ORDER,SDIM,false,true>(X, iel_ho, kx, ky, Q, true);
86
87 for (int iqx=0; iqx<2; ++iqx)
88 {
89 for (int iqy=0; iqy<2; ++iqy)
90 {
91 const real_t mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
92 const real_t dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
93
94 // Loop over x,y components. c=0 => x, c=1 => y
95 for (int cj=0; cj<dim; ++cj)
96 {
97 for (int bj=0; bj<2; ++bj)
98 {
99 const real_t curl_j = ((cj == 0) ? 1 : -1)*((bj == 0) ? 1 : -1);
100 const real_t bxj = (cj == 0) ? ((bj == iqy) ? 1 : 0) : 0;
101 const real_t byj = (cj == 1) ? ((bj == iqx) ? 1 : 0) : 0;
102
103 const real_t jj_loc = bj + 2*cj;
104
105 for (int ci=0; ci<dim; ++ci)
106 {
107 for (int bi=0; bi<2; ++bi)
108 {
109 const real_t curl_i = ((ci == 0) ? 1 : -1)*((bi == 0) ? 1 : -1);
110 const real_t bxi = (ci == 0) ? ((bi == iqy) ? 1 : 0) : 0;
111 const real_t byi = (ci == 1) ? ((bi == iqx) ? 1 : 0) : 0;
112
113 const real_t ii_loc = bi + 2*ci;
114
115 // Only store the lower-triangular part of
116 // the matrix (by symmetry).
117 if (jj_loc > ii_loc) { continue; }
118
119 real_t val = 0.0;
120 val += bxi*bxj*Q(0,iqy,iqx);
121 val += byi*bxj*Q(1,iqy,iqx);
122 val += bxi*byj*Q(1,iqy,iqx);
123 val += byi*byj*Q(2,iqy,iqx);
124 val *= mq;
125 val += dq*curl_i*curl_j*Q(3,iqy,iqx);
126
127 local_mat(ii_loc, jj_loc) += val;
128 }
129 }
130 }
131 }
132 }
133 }
134 // Assemble the local matrix into the macro-element sparse matrix
135 // in a format similar to coordinate format. The (I,J) arrays
136 // are implicit (not stored explicitly).
137 for (int ii_loc=0; ii_loc<ne; ++ii_loc)
138 {
139 const int ci = ii_loc/2;
140 const int bi = ii_loc%2;
141 const int ix = (ci == 0) ? 0 : bi;
142 const int iy = (ci == 1) ? 0 : bi;
143
144 int ii = (ci == 0) ? kx+ix + (ky+iy)*o : kx+ix + (ky+iy)*op1;
145
146 for (int jj_loc=0; jj_loc<ne; ++jj_loc)
147 {
148 const int cj = jj_loc/2;
149 const int bj = jj_loc%2;
150
151 const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + bj + (1-bi)*2);
152
153 // Symmetry
154 const real_t val = (jj_loc <= ii_loc)
155 ? local_mat(ii_loc, jj_loc)
156 : local_mat(jj_loc, ii_loc);
157 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
158 }
159 }
160 }
161 }
162 });
163
164 sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
165 sparse_mapping = -1;
166 auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
167 for (int ci=0; ci<2; ++ci)
168 {
169 for (int i1=0; i1<o; ++i1)
170 {
171 for (int i2=0; i2<op1; ++i2)
172 {
173 const int ii_el = (ci == 0) ? i1 + i2*o : i2 + i1*op1 + o*op1;
174 for (int cj=0; cj<2; ++cj)
175 {
176 const int j1_begin = (ci == cj) ? i1 : ((i2 > 0) ? i2-1 : i2);
177 const int j1_end = (ci == cj) ? i1 : ((i2 < o) ? i2 : i2-1);
178 const int j2_begin = (ci == cj) ? ((i2 > 0) ? i2-1 : i2) : i1;
179 const int j2_end = (ci == cj) ? ((i2 < o) ? i2+1 : i2) : i1+1;
180
181 for (int j1=j1_begin; j1<=j1_end; ++j1)
182 {
183 for (int j2=j2_begin; j2<=j2_end; ++j2)
184 {
185 const int jj_el = (cj == 0) ? j1 + j2*o : j2 + j1*op1 + o*op1;
186 int jj_off = (ci == cj) ? (j2-i2+1) : 3 + (j2-i1) + 2*(j1-i2+1);
187 map(jj_off, ii_el) = jj_el;
188 }
189 }
190 }
191 }
192 }
193 }
194}
195
196template <int ORDER>
198{
199 const int nel_ho = fes_ho.GetNE();
200
201 static constexpr int nv = 8; // number of vertices in hexahedron
202 static constexpr int ne = 12; // number of edges in hexahedron
203 static constexpr int dim = 3;
204 static constexpr int ddm2 = (dim*(dim+1))/2;
205 static constexpr int ngeom = 2*ddm2; // number of geometric factors stored
206 static constexpr int o = ORDER;
207 static constexpr int op1 = ORDER + 1;
208 static constexpr int ndof_per_el = dim*o*op1*op1;
209 static constexpr int nnz_per_row = 33;
210 static constexpr int sz_local_mat = ne*ne;
211
212 const bool const_mq = c1.Size() == 1;
213 const auto MQ = const_mq
214 ? Reshape(c1.Read(), 1, 1, 1, 1)
215 : Reshape(c1.Read(), op1, op1, op1, nel_ho);
216 const bool const_dq = c2.Size() == 1;
217 const auto DQ = const_dq
218 ? Reshape(c2.Read(), 1, 1, 1, 1)
219 : Reshape(c2.Read(), op1, op1, op1, nel_ho);
220
221 sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
222 auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1*op1, dim, nel_ho);
223
224 auto X = X_vert.Read();
225
226 // Last thread dimension is lowered to avoid "too many resources" error
227 mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
228 [=] MFEM_HOST_DEVICE (int iel_ho)
229 {
230 MFEM_FOREACH_THREAD(iz,z,o)
231 {
232 MFEM_FOREACH_THREAD(iy,y,op1)
233 {
234 MFEM_FOREACH_THREAD(ix,x,op1)
235 {
236 for (int c=0; c<dim; ++c)
237 {
238 for (int j=0; j<nnz_per_row; ++j)
239 {
240 V(j,ix+iy*op1+iz*op1*op1,c,iel_ho) = 0.0;
241 }
242 }
243 }
244 }
245 }
246 MFEM_SYNC_THREAD;
247
248 // Loop over the sub-elements
249 MFEM_FOREACH_THREAD(kz,z,ORDER)
250 {
251 MFEM_FOREACH_THREAD(ky,y,ORDER)
252 {
253 MFEM_FOREACH_THREAD(kx,x,ORDER)
254 {
255 // Geometric factors at quadrature points (element vertices)
256 real_t Q_[ngeom*nv];
257 DeviceTensor<4> Q(Q_, ngeom, 2, 2, 2);
258
259 real_t local_mat_[sz_local_mat];
260 DeviceTensor<2> local_mat(local_mat_, ne, ne);
261 for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
262
263 real_t vx[8], vy[8], vz[8];
264 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
265
266 for (int iqz=0; iqz<2; ++iqz)
267 {
268 for (int iqy=0; iqy<2; ++iqy)
269 {
270 for (int iqx=0; iqx<2; ++iqx)
271 {
272 const real_t x = iqx;
273 const real_t y = iqy;
274 const real_t z = iqz;
275 const real_t w = 1.0/8.0;
276
277 real_t J_[3*3];
278 DeviceTensor<2> J(J_, 3, 3);
279
280 Jacobian3D(x, y, z, vx, vy, vz, J);
281
282 const real_t detJ = Det3D(J);
283 const real_t w_detJ = w/detJ;
284
285 // adj(J)
286 real_t A_[3*3];
287 DeviceTensor<2> A(A_, 3, 3);
288 Adjugate3D(J, A);
289
290 Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2)); // 1,1
291 Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2)); // 2,1
292 Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2)); // 3,1
293 Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2)); // 2,2
294 Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2)); // 3,2
295 Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2)); // 3,3
296
297 // w J^T J / det(J)
298 Q(6,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0)); // 1,1
299 Q(7,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,1)+J(1,0)*J(1,1)+J(2,0)*J(2,1)); // 2,1
300 Q(8,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,2)+J(1,0)*J(1,2)+J(2,0)*J(2,2)); // 3,1
301 Q(9,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1)); // 2,2
302 Q(10,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,2)+J(1,1)*J(1,2)+J(2,1)*J(2,2)); // 3,2
303 Q(11,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2)); // 3,3
304 }
305 }
306 }
307 for (int iqz=0; iqz<2; ++iqz)
308 {
309 for (int iqy=0; iqy<2; ++iqy)
310 {
311 for (int iqx=0; iqx<2; ++iqx)
312 {
313 const real_t mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
314 const real_t dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
315 // Loop over x,y,z components. 0 => x, 1 => y, 2 => z
316 for (int cj=0; cj<dim; ++cj)
317 {
318 const real_t jq1 = (cj == 0) ? iqy : ((cj == 1) ? iqz : iqx);
319 const real_t jq2 = (cj == 0) ? iqz : ((cj == 1) ? iqx : iqy);
320
321 const int jd_0 = cj;
322 const int jd_1 = (cj + 1)%3;
323 const int jd_2 = (cj + 2)%3;
324
325 for (int bj=0; bj<4; ++bj) // 4 edges in each dim
326 {
327 const int bj1 = bj%2;
328 const int bj2 = bj/2;
329
330 real_t curl_j[3];
331 curl_j[jd_0] = 0.0;
332 curl_j[jd_1] = ((bj1 == 0) ? jq1 - 1 : -jq1)*((bj2 == 0) ? 1 : -1);
333 curl_j[jd_2] = ((bj2 == 0) ? 1 - jq2 : jq2)*((bj1 == 0) ? 1 : -1);
334
335 real_t basis_j[3];
336 basis_j[jd_0] = ((bj1 == 0) ? 1 - jq1 : jq1)*((bj2 == 0) ? 1 - jq2 : jq2);
337 basis_j[jd_1] = 0.0;
338 basis_j[jd_2] = 0.0;
339
340 const int jj_loc = bj + 4*cj;
341
342 for (int ci=0; ci<dim; ++ci)
343 {
344 const real_t iq1 = (ci == 0) ? iqy : ((ci == 1) ? iqz : iqx);
345 const real_t iq2 = (ci == 0) ? iqz : ((ci == 1) ? iqx : iqy);
346
347 const int id_0 = ci;
348 const int id_1 = (ci + 1)%3;
349 const int id_2 = (ci + 2)%3;
350
351 for (int bi=0; bi<4; ++bi)
352 {
353 const int bi1 = bi%2;
354 const int bi2 = bi/2;
355
356 real_t curl_i[3];
357 curl_i[id_0] = 0.0;
358 curl_i[id_1] = ((bi1 == 0) ? iq1 - 1 : -iq1)*((bi2 == 0) ? 1 : -1);
359 curl_i[id_2] = ((bi2 == 0) ? 1 - iq2 : iq2)*((bi1 == 0) ? 1 : -1);
360
361 real_t basis_i[3];
362 basis_i[id_0] = ((bi1 == 0) ? 1 - iq1 : iq1)*((bi2 == 0) ? 1 - iq2 : iq2);
363 basis_i[id_1] = 0.0;
364 basis_i[id_2] = 0.0;
365
366 const int ii_loc = bi + 4*ci;
367
368 // Only store the lower-triangular part of
369 // the matrix (by symmetry).
370 if (jj_loc > ii_loc) { continue; }
371
372 real_t curl_curl = 0.0;
373 curl_curl += Q(6,iqz,iqy,iqx)*curl_i[0]*curl_j[0];
374 curl_curl += Q(7,iqz,iqy,iqx)*(curl_i[0]*curl_j[1] + curl_i[1]*curl_j[0]);
375 curl_curl += Q(8,iqz,iqy,iqx)*(curl_i[0]*curl_j[2] + curl_i[2]*curl_j[0]);
376 curl_curl += Q(9,iqz,iqy,iqx)*curl_i[1]*curl_j[1];
377 curl_curl += Q(10,iqz,iqy,iqx)*(curl_i[1]*curl_j[2] + curl_i[2]*curl_j[1]);
378 curl_curl += Q(11,iqz,iqy,iqx)*curl_i[2]*curl_j[2];
379
380 real_t basis_basis = 0.0;
381 basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
382 basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
383 basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
384 basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
385 basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
386 basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
387
388 const real_t val = dq*curl_curl + mq*basis_basis;
389
390 local_mat(ii_loc, jj_loc) += val;
391 }
392 }
393 }
394 }
395 }
396 }
397 }
398 // Assemble the local matrix into the macro-element sparse matrix
399 // The nonzeros of the macro-element sparse matrix are ordered as
400 // follows:
401 //
402 // The axes are ordered relative to the direction of the basis
403 // vector, e.g. for x-vectors, the axes are (x,y,z), for
404 // y-vectors the axes are (y,z,x), and for z-vectors the axes are
405 // (z,x,y).
406 //
407 // The nonzeros are then given in "rotated lexicographic"
408 // ordering, according to these axes.
409 for (int ii_loc=0; ii_loc<ne; ++ii_loc)
410 {
411 const int ci = ii_loc/4;
412 const int bi = ii_loc%4;
413
414 const int id0 = ci;
415 const int id1 = (ci+1)%3;
416 const int id2 = (ci+2)%3;
417
418 const int i0 = 0;
419 const int i1 = bi%2;
420 const int i2 = bi/2;
421
422 int ii_lex[3];
423 ii_lex[id0] = i0;
424 ii_lex[id1] = i1;
425 ii_lex[id2] = i2;
426
427 const int nx = (ci == 0) ? o : op1;
428 const int ny = (ci == 1) ? o : op1;
429
430 const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
431
432 for (int jj_loc=0; jj_loc<ne; ++jj_loc)
433 {
434 const int cj = jj_loc/4;
435 // add 3 to take modulus (rather than remainder) when
436 // (cj - ci) is negative
437 const int cj_rel = (3 + cj - ci)%3;
438
439 const int bj = jj_loc%4;
440
441 const int jd0 = cj_rel;
442 const int jd1 = (cj_rel+1)%3;
443 const int jd2 = (cj_rel+2)%3;
444
445 int jj_rel[3];
446 jj_rel[jd0] = 0;
447 jj_rel[jd1] = bj%2;
448 jj_rel[jd2] = bj/2;
449
450 const int d0 = jj_rel[0] - i0;
451 const int d1 = 1 + jj_rel[1] - i1;
452 const int d2 = 1 + jj_rel[2] - i2;
453 int jj_off;
454 if (cj_rel == 0) { jj_off = d1 + 3*d2; }
455 else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; }
456 else /* if (cj_rel == 2) */ { jj_off = 21 + d0 + 2*d1 + 6*d2; }
457
458 // Symmetry
459 const real_t val = (jj_loc <= ii_loc)
460 ? local_mat(ii_loc, jj_loc)
461 : local_mat(jj_loc, ii_loc);
462 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
463 }
464 }
465 }
466 }
467 }
468 });
469
470 sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
471 sparse_mapping = -1;
472 auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
473 for (int ci=0; ci<dim; ++ci)
474 {
475 const int i_off = ci*o*op1*op1;
476 const int id0 = ci;
477 const int id1 = (ci+1)%3;
478 const int id2 = (ci+2)%3;
479
480 const int nxi = (ci == 0) ? o : op1;
481 const int nyi = (ci == 1) ? o : op1;
482
483 for (int i0=0; i0<o; ++i0)
484 {
485 for (int i1=0; i1<op1; ++i1)
486 {
487 for (int i2=0; i2<op1; ++i2)
488 {
489 int ii_lex[3];
490 ii_lex[id0] = i0;
491 ii_lex[id1] = i1;
492 ii_lex[id2] = i2;
493 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
494
495 for (int cj_rel=0; cj_rel<dim; ++cj_rel)
496 {
497 const int cj = (ci + cj_rel) % 3;
498 const int j_off = cj*o*op1*op1;
499
500 const int nxj = (cj == 0) ? o : op1;
501 const int nyj = (cj == 1) ? o : op1;
502
503 const int j0_begin = i0;
504 const int j0_end = (cj_rel == 0) ? i0 : i0 + 1;
505 const int j1_begin = (i1 > 0) ? i1-1 : i1;
506 const int j1_end = (cj_rel == 1)
507 ? ((i1 < o) ? i1 : i1-1)
508 : ((i1 < o) ? i1+1 : i1);
509 const int j2_begin = (i2 > 0) ? i2-1 : i2;
510 const int j2_end = (cj_rel == 2)
511 ? ((i2 < o) ? i2 : i2-1)
512 : ((i2 < o) ? i2+1 : i2);
513
514 for (int j0=j0_begin; j0<=j0_end; ++j0)
515 {
516 const int d0 = j0 - i0;
517 for (int j1=j1_begin; j1<=j1_end; ++j1)
518 {
519 const int d1 = j1 - i1 + 1;
520 for (int j2=j2_begin; j2<=j2_end; ++j2)
521 {
522 const int d2 = j2 - i2 + 1;
523 int jj_lex[3];
524 jj_lex[id0] = j0;
525 jj_lex[id1] = j1;
526 jj_lex[id2] = j2;
527 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
528 int jj_off;
529 if (cj_rel == 0) { jj_off = d1 + 3*d2; }
530 else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; }
531 else /* if (cj_rel == 2) */ { jj_off = 21 + d0 + 2*d1 + 6*d2; }
532 map(jj_off, ii_el) = jj_el;
533 }
534 }
535 }
536 }
537 }
538 }
539 }
540 }
541}
542
543} // namespace mfem
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition: backends.hpp:92
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:697
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition: array.hpp:337
FiniteElementSpace & fes_ho
The associated high-order space.
CoefficientVector c2
Coefficient of second integrator.
Vector & X_vert
Mesh coordinate vector.
Vector & sparse_ij
Local element sparsity matrix data.
CoefficientVector c1
Coefficient of first integrator.
Array< int > & sparse_mapping
Local element sparsity pattern.
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:82
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:740
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:474
int Size() const
Returns the size of the vector.
Definition: vector.hpp:218
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:538
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:482
int dim
Definition: ex24.cpp:53
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
MFEM_HOST_DEVICE void Jacobian3D(const real_t x, const real_t y, const real_t z, const real_t vx[8], const real_t vy[8], const real_t vz[8], DeviceMatrix &J)
Definition: lor_util.hpp:210
void forall_2D(int N, int X, int Y, lambda &&body)
Definition: forall.hpp:763
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:775
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
Definition: lor_util.hpp:28
MFEM_HOST_DEVICE void Adjugate3D(const DeviceMatrix &J, DeviceMatrix &A)
Definition: lor_util.hpp:254
float real_t
Definition: config.hpp:43