MFEM  v4.6.0
Finite element discretization library
kernels.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_LINALG_KERNELS_HPP
13 #define MFEM_LINALG_KERNELS_HPP
14 
15 #include "../config/config.hpp"
16 #include "../general/backends.hpp"
17 #include "../general/globals.hpp"
18 
19 #include "matrix.hpp"
20 #include "tmatrix.hpp"
21 #include "tlayout.hpp"
22 #include "ttensor.hpp"
23 
24 // This header contains stand-alone functions for "small" dense linear algebra
25 // (at quadrature point or element-level) designed to be inlined directly into
26 // device kernels.
27 
28 // Many methods of the DenseMatrix class and some of the Vector class call these
29 // kernels directly on the host, see the implementations in linalg/densemat.cpp
30 // and linalg/vector.cpp.
31 
32 namespace mfem
33 {
34 
35 namespace kernels
36 {
37 
38 /// Compute the square of the Euclidean distance to another vector
39 template<int dim>
40 MFEM_HOST_DEVICE inline double DistanceSquared(const double *x, const double *y)
41 {
42  double d = 0.0;
43  for (int i = 0; i < dim; i++) { d += (x[i]-y[i])*(x[i]-y[i]); }
44  return d;
45 }
46 
47 /// Creates n x n diagonal matrix with diagonal elements c
48 template<int dim>
49 MFEM_HOST_DEVICE inline void Diag(const double c, double *data)
50 {
51  const int N = dim*dim;
52  for (int i = 0; i < N; i++) { data[i] = 0.0; }
53  for (int i = 0; i < dim; i++) { data[i*(dim+1)] = c; }
54 }
55 
56 /// Vector subtraction operation: z = a * (x - y)
57 template<int dim>
58 MFEM_HOST_DEVICE inline void Subtract(const double a,
59  const double *x, const double *y,
60  double *z)
61 {
62  for (int i = 0; i < dim; i++) { z[i] = a * (x[i] - y[i]); }
63 }
64 
65 /// Dense matrix operation: VWt += v w^t
66 template<int dim>
67 MFEM_HOST_DEVICE inline void AddMultVWt(const double *v, const double *w,
68  double *VWt)
69 {
70  for (int i = 0; i < dim; i++)
71  {
72  const double vi = v[i];
73  for (int j = 0; j < dim; j++) { VWt[i*dim+j] += vi * w[j]; }
74  }
75 }
76 
77 template<int H, int W, typename T>
78 MFEM_HOST_DEVICE inline
79 void FNorm(double &scale_factor, double &scaled_fnorm2, const T *data)
80 {
81  int i, hw = H * W;
82  T max_norm = 0.0, entry, fnorm2;
83 
84  for (i = 0; i < hw; i++)
85  {
86  entry = fabs(data[i]);
87  if (entry > max_norm)
88  {
89  max_norm = entry;
90  }
91  }
92 
93  if (max_norm == 0.0)
94  {
95  scale_factor = scaled_fnorm2 = 0.0;
96  return;
97  }
98 
99  fnorm2 = 0.0;
100  for (i = 0; i < hw; i++)
101  {
102  entry = data[i] / max_norm;
103  fnorm2 += entry * entry;
104  }
105 
106  scale_factor = max_norm;
107  scaled_fnorm2 = fnorm2;
108 }
109 
110 /// Compute the Frobenius norm of the matrix
111 template<int H, int W, typename T>
112 MFEM_HOST_DEVICE inline
113 double FNorm(const T *data)
114 {
115  double s, n2;
116  kernels::FNorm<H,W>(s, n2, data);
117  return s*sqrt(n2);
118 }
119 
120 /// Compute the square of the Frobenius norm of the matrix
121 template<int H, int W, typename T>
122 MFEM_HOST_DEVICE inline
123 double FNorm2(const T *data)
124 {
125  double s, n2;
126  kernels::FNorm<H,W>(s, n2, data);
127  return s*s*n2;
128 }
129 
130 /// Returns the l2 norm of the Vector with given @a size and @a data.
131 template<typename T>
132 MFEM_HOST_DEVICE inline
133 double Norml2(const int size, const T *data)
134 {
135  if (0 == size) { return 0.0; }
136  if (1 == size) { return std::abs(data[0]); }
137  T scale = 0.0;
138  T sum = 0.0;
139  for (int i = 0; i < size; i++)
140  {
141  if (data[i] != 0.0)
142  {
143  const T absdata = fabs(data[i]);
144  if (scale <= absdata)
145  {
146  const T sqr_arg = scale / absdata;
147  sum = 1.0 + sum * (sqr_arg * sqr_arg);
148  scale = absdata;
149  continue;
150  } // end if scale <= absdata
151  const T sqr_arg = absdata / scale;
152  sum += (sqr_arg * sqr_arg); // else scale > absdata
153  } // end if data[i] != 0
154  }
155  return scale * sqrt(sum);
156 }
157 
158 /** @brief Matrix vector multiplication: y = A x, where the matrix A is of size
159  @a height x @a width with given @a data, while @a x and @a y specify the
160  data of the input and output vectors. */
161 template<typename TA, typename TX, typename TY>
162 MFEM_HOST_DEVICE inline
163 void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
164 {
165  if (width == 0)
166  {
167  for (int row = 0; row < height; row++)
168  {
169  y[row] = 0.0;
170  }
171  return;
172  }
173  const TA *d_col = data;
174  TX x_col = x[0];
175  for (int row = 0; row < height; row++)
176  {
177  y[row] = x_col*d_col[row];
178  }
179  d_col += height;
180  for (int col = 1; col < width; col++)
181  {
182  x_col = x[col];
183  for (int row = 0; row < height; row++)
184  {
185  y[row] += x_col*d_col[row];
186  }
187  d_col += height;
188  }
189 }
190 
191 /** @brief Matrix transpose vector multiplication: y = At x, where the matrix A
192  is of size @a height x @a width with given @a data, while @a x and @a y
193  specify the data of the input and output vectors. */
194 template<typename TA, typename TX, typename TY>
195 MFEM_HOST_DEVICE inline
196 void MultTranspose(const int height, const int width, const TA *data,
197  const TX *x, TY *y)
198 {
199  if (height == 0)
200  {
201  for (int row = 0; row < width; row++)
202  {
203  y[row] = 0.0;
204  }
205  return;
206  }
207  TY *y_off = y;
208  for (int i = 0; i < width; ++i)
209  {
210  TY val = 0.0;
211  for (int j = 0; j < height; ++j)
212  {
213  val += x[j] * data[i * height + j];
214  }
215  *y_off = val;
216  y_off++;
217  }
218 }
219 
220 /// Symmetrize a square matrix with given @a size and @a data: A -> (A+A^T)/2.
221 template<typename T>
222 MFEM_HOST_DEVICE inline
223 void Symmetrize(const int size, T *data)
224 {
225  for (int i = 0; i < size; i++)
226  {
227  for (int j = 0; j < i; j++)
228  {
229  const T a = 0.5 * (data[i*size+j] + data[j*size+i]);
230  data[j*size+i] = data[i*size+j] = a;
231  }
232  }
233 }
234 
235 /// Compute the determinant of a square matrix of size dim with given @a data.
236 template<int dim, typename T>
237 MFEM_HOST_DEVICE inline T Det(const T *data)
238 {
239  return TDetHD<T>(ColumnMajorLayout2D<dim,dim>(), data);
240 }
241 
242 /** @brief Return the inverse of a matrix with given @a size and @a data into
243  the matrix with data @a inv_data. */
244 template<int dim, typename T>
245 MFEM_HOST_DEVICE inline
246 void CalcInverse(const T *data, T *inv_data)
247 {
248  typedef ColumnMajorLayout2D<dim,dim> layout_t;
249  const T det = TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
250  TAssignHD<AssignOp::Mult>(layout_t(), inv_data, static_cast<T>(1.0)/det);
251 }
252 
253 /** @brief Return the adjugate of a matrix */
254 template<int dim, typename T>
255 MFEM_HOST_DEVICE inline
256 void CalcAdjugate(const T *data, T *adj_data)
257 {
258  typedef ColumnMajorLayout2D<dim,dim> layout_t;
259  TAdjugateHD<T>(layout_t(), data, layout_t(), adj_data);
260 }
261 
262 /** @brief Compute C = A + alpha*B, where the matrices A, B and C are of size @a
263  height x @a width with data @a Adata, @a Bdata and @a Cdata. */
264 template<typename TALPHA, typename TA, typename TB, typename TC>
265 MFEM_HOST_DEVICE inline
266 void Add(const int height, const int width, const TALPHA alpha,
267  const TA *Adata, const TB *Bdata, TC *Cdata)
268 {
269  for (int j = 0; j < width; j++)
270  {
271  for (int i = 0; i < height; i++)
272  {
273  const int n = i*width+j;
274  Cdata[n] = Adata[n] + alpha * Bdata[n];
275  }
276  }
277 }
278 
279 /** @brief Compute C = alpha*A + beta*B, where the matrices A, B and C are of
280  size @a height x @a width with data @a Adata, @a Bdata and @a Cdata. */
281 template<typename TALPHA, typename TBETA, typename TA, typename TB, typename TC>
282 MFEM_HOST_DEVICE inline
283 void Add(const int height, const int width,
284  const TALPHA alpha, const TA *Adata,
285  const TBETA beta, const TB *Bdata,
286  TC *Cdata)
287 {
288  const int m = height * width;
289  for (int i = 0; i < m; i++)
290  {
291  Cdata[i] = alpha * Adata[i] + beta * Bdata[i];
292  }
293 }
294 
295 /** @brief Compute B += A, where the matrices A and B are of size
296  @a height x @a width with data @a Adata and @a Bdata. */
297 template<typename TA, typename TB>
298 MFEM_HOST_DEVICE inline
299 void Add(const int height, const int width, const TA *Adata, TB *Bdata)
300 {
301  const int m = height * width;
302  for (int i = 0; i < m; i++)
303  {
304  Bdata[i] += Adata[i];
305  }
306 }
307 
308 /** @brief Compute B +=alpha*A, where the matrices A and B are of size
309  @a height x @a width with data @a Adata and @a Bdata. */
310 template<typename TA, typename TB>
311 MFEM_HOST_DEVICE inline
312 void Add(const int height, const int width,
313  const double alpha, const TA *Adata, TB *Bdata)
314 {
315  const int m = height * width;
316  for (int i = 0; i < m; i++)
317  {
318  Bdata[i] += alpha * Adata[i];
319  }
320 }
321 
322 /** @brief Compute B = alpha*A, where the matrices A and B are of size
323  @a height x @a width with data @a Adata and @a Bdata. */
324 template<typename TA, typename TB>
325 MFEM_HOST_DEVICE inline
326 void Set(const int height, const int width,
327  const double alpha, const TA *Adata, TB *Bdata)
328 {
329  const int m = height * width;
330  for (int i = 0; i < m; i++)
331  {
332  Bdata[i] = alpha * Adata[i];
333  }
334 }
335 
336 /** @brief Matrix-matrix multiplication: A = B * C, where the matrices A, B and
337  C are of sizes @a Aheight x @a Awidth, @a Aheight x @a Bwidth and @a Bwidth
338  x @a Awidth, respectively. */
339 template<typename TA, typename TB, typename TC>
340 MFEM_HOST_DEVICE inline
341 void Mult(const int Aheight, const int Awidth, const int Bwidth,
342  const TB *Bdata, const TC *Cdata, TA *Adata)
343 {
344  const int ah_x_aw = Aheight * Awidth;
345  for (int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
346  for (int j = 0; j < Awidth; j++)
347  {
348  for (int k = 0; k < Bwidth; k++)
349  {
350  for (int i = 0; i < Aheight; i++)
351  {
352  Adata[i+j*Aheight] += Bdata[i+k*Aheight] * Cdata[k+j*Bwidth];
353  }
354  }
355  }
356 }
357 
358 /** @brief Multiply a matrix of size @a Aheight x @a Awidth and data @a Adata
359  with the transpose of a matrix of size @a Bheight x @a Awidth and data @a
360  Bdata: A * Bt. Return the result in a matrix with data @a ABtdata. */
361 template<typename TA, typename TB, typename TC>
362 MFEM_HOST_DEVICE inline
363 void MultABt(const int Aheight, const int Awidth, const int Bheight,
364  const TA *Adata, const TB *Bdata, TC *ABtdata)
365 {
366  const int ah_x_bh = Aheight * Bheight;
367  for (int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
368  for (int k = 0; k < Awidth; k++)
369  {
370  TC *c = ABtdata;
371  for (int j = 0; j < Bheight; j++)
372  {
373  const double bjk = Bdata[j];
374  for (int i = 0; i < Aheight; i++)
375  {
376  c[i] += Adata[i] * bjk;
377  }
378  c += Aheight;
379  }
380  Adata += Aheight;
381  Bdata += Bheight;
382  }
383 }
384 
385 /** @brief Multiply the transpose of a matrix of size @a Aheight x @a Awidth
386  and data @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a
387  Bdata: At * B. Return the result in a matrix with data @a AtBdata. */
388 template<typename TA, typename TB, typename TC>
389 MFEM_HOST_DEVICE inline
390 void MultAtB(const int Aheight, const int Awidth, const int Bwidth,
391  const TA *Adata, const TB *Bdata, TC *AtBdata)
392 {
393  TC *c = AtBdata;
394  for (int i = 0; i < Bwidth; ++i)
395  {
396  for (int j = 0; j < Awidth; ++j)
397  {
398  TC val = 0.0;
399  for (int k = 0; k < Aheight; ++k)
400  {
401  val += Adata[j * Aheight + k] * Bdata[i * Aheight + k];
402  }
403  *c = val;
404  c++;
405  }
406  }
407 }
408 
409 /// Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
410 template<int HEIGHT, int WIDTH> MFEM_HOST_DEVICE
411 void CalcLeftInverse(const double *data, double *left_inv);
412 
413 /// Compute the spectrum of the matrix of size dim with given @a data, returning
414 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
415 /// vec (listed consecutively).
416 template<int dim> MFEM_HOST_DEVICE
417 void CalcEigenvalues(const double *data, double *lambda, double *vec);
418 
419 /// Return the i'th singular value of the matrix of size dim with given @a data.
420 template<int dim> MFEM_HOST_DEVICE
421 double CalcSingularvalue(const double *data, const int i);
422 
423 
424 // Utility functions for CalcEigenvalues and CalcSingularvalue
425 namespace internal
426 {
427 
428 /// Utility function to swap the values of @a a and @a b.
429 template<typename T>
430 MFEM_HOST_DEVICE static inline
431 void Swap(T &a, T &b)
432 {
433  T tmp = a;
434  a = b;
435  b = tmp;
436 }
437 
438 const double Epsilon = std::numeric_limits<double>::epsilon();
439 
440 /// Utility function used in CalcSingularvalue<3>.
441 MFEM_HOST_DEVICE static inline
442 void Eigenvalues2S(const double &d12, double &d1, double &d2)
443 {
444  const double sqrt_1_eps = sqrt(1./Epsilon);
445  if (d12 != 0.)
446  {
447  // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
448  double t;
449  const double zeta = (d2 - d1)/(2*d12); // inf/inf from overflows?
450  if (fabs(zeta) < sqrt_1_eps)
451  {
452  t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
453  }
454  else
455  {
456  t = d12*copysign(0.5/fabs(zeta), zeta);
457  }
458  d1 -= t;
459  d2 += t;
460  }
461 }
462 
463 /// Utility function used in CalcEigenvalues().
464 MFEM_HOST_DEVICE static inline
465 void Eigensystem2S(const double &d12, double &d1, double &d2,
466  double &c, double &s)
467 {
468  const double sqrt_1_eps = sqrt(1./Epsilon);
469  if (d12 == 0.0)
470  {
471  c = 1.;
472  s = 0.;
473  }
474  else
475  {
476  // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
477  double t;
478  const double zeta = (d2 - d1)/(2*d12);
479  const double azeta = fabs(zeta);
480  if (azeta < sqrt_1_eps)
481  {
482  t = copysign(1./(azeta + sqrt(1. + zeta*zeta)), zeta);
483  }
484  else
485  {
486  t = copysign(0.5/azeta, zeta);
487  }
488  c = sqrt(1./(1. + t*t));
489  s = c*t;
490  t *= d12;
491  d1 -= t;
492  d2 += t;
493  }
494 }
495 
496 
497 /// Utility function used in CalcEigenvalues<3>.
498 MFEM_HOST_DEVICE static inline
499 void GetScalingFactor(const double &d_max, double &mult)
500 {
501  int d_exp;
502  if (d_max > 0.)
503  {
504  mult = frexp(d_max, &d_exp);
505  if (d_exp == std::numeric_limits<double>::max_exponent)
506  {
507  mult *= std::numeric_limits<double>::radix;
508  }
509  mult = d_max/mult;
510  }
511  else
512  {
513  mult = 1.;
514  }
515  // mult = 2^d_exp is such that d_max/mult is in [0.5,1) or in other words
516  // d_max is in the interval [0.5,1)*mult
517 }
518 
519 /// Utility function used in CalcEigenvalues<3>.
520 MFEM_HOST_DEVICE static inline
521 bool KernelVector2G(const int &mode,
522  double &d1, double &d12, double &d21, double &d2)
523 {
524  // Find a vector (z1,z2) in the "near"-kernel of the matrix
525  // | d1 d12 |
526  // | d21 d2 |
527  // using QR factorization.
528  // The vector (z1,z2) is returned in (d1,d2). Return 'true' if the matrix
529  // is zero without setting (d1,d2).
530  // Note: in the current implementation |z1| + |z2| = 1.
531 
532  // l1-norms of the columns
533  double n1 = fabs(d1) + fabs(d21);
534  double n2 = fabs(d2) + fabs(d12);
535 
536  bool swap_columns = (n2 > n1);
537  double mu;
538 
539  if (!swap_columns)
540  {
541  if (n1 == 0.)
542  {
543  return true;
544  }
545 
546  if (mode == 0) // eliminate the larger entry in the column
547  {
548  if (fabs(d1) > fabs(d21))
549  {
550  Swap(d1, d21);
551  Swap(d12, d2);
552  }
553  }
554  else // eliminate the smaller entry in the column
555  {
556  if (fabs(d1) < fabs(d21))
557  {
558  Swap(d1, d21);
559  Swap(d12, d2);
560  }
561  }
562  }
563  else
564  {
565  // n2 > n1, swap columns 1 and 2
566  if (mode == 0) // eliminate the larger entry in the column
567  {
568  if (fabs(d12) > fabs(d2))
569  {
570  Swap(d1, d2);
571  Swap(d12, d21);
572  }
573  else
574  {
575  Swap(d1, d12);
576  Swap(d21, d2);
577  }
578  }
579  else // eliminate the smaller entry in the column
580  {
581  if (fabs(d12) < fabs(d2))
582  {
583  Swap(d1, d2);
584  Swap(d12, d21);
585  }
586  else
587  {
588  Swap(d1, d12);
589  Swap(d21, d2);
590  }
591  }
592  }
593 
594  n1 = hypot(d1, d21);
595 
596  if (d21 != 0.)
597  {
598  // v = (n1, n2)^t, |v| = 1
599  // Q = I - 2 v v^t, Q (d1, d21)^t = (mu, 0)^t
600  mu = copysign(n1, d1);
601  n1 = -d21*(d21/(d1 + mu)); // = d1 - mu
602  d1 = mu;
603  // normalize (n1,d21) to avoid overflow/underflow
604  // normalize (n1,d21) by the max-norm to avoid the sqrt call
605  if (fabs(n1) <= fabs(d21))
606  {
607  // (n1,n2) <-- (n1/d21,1)
608  n1 = n1/d21;
609  mu = (2./(1. + n1*n1))*(n1*d12 + d2);
610  d2 = d2 - mu;
611  d12 = d12 - mu*n1;
612  }
613  else
614  {
615  // (n1,n2) <-- (1,d21/n1)
616  n2 = d21/n1;
617  mu = (2./(1. + n2*n2))*(d12 + n2*d2);
618  d2 = d2 - mu*n2;
619  d12 = d12 - mu;
620  }
621  }
622 
623  // Solve:
624  // | d1 d12 | | z1 | = | 0 |
625  // | 0 d2 | | z2 | | 0 |
626 
627  // choose (z1,z2) to minimize |d1*z1 + d12*z2| + |d2*z2|
628  // under the condition |z1| + |z2| = 1, z2 >= 0 (for uniqueness)
629  // set t = z1, z2 = 1 - |t|, -1 <= t <= 1
630  // objective function is:
631  // |d1*t + d12*(1 - |t|)| + |d2|*(1 - |t|) -- piecewise linear with
632  // possible minima are -1,0,1,t1 where t1: d1*t1 + d12*(1 - |t1|) = 0
633  // values: @t=+/-1 -> |d1|, @t=0 -> |n1| + |d2|, @t=t1 -> |d2|*(1 - |t1|)
634 
635  // evaluate z2 @t=t1
636  mu = -d12/d1;
637  // note: |mu| <= 1, if using l2-norm for column pivoting
638  // |mu| <= sqrt(2), if using l1-norm
639  n2 = 1./(1. + fabs(mu));
640  // check if |d1|<=|d2|*z2
641  if (fabs(d1) <= n2*fabs(d2))
642  {
643  d2 = 0.;
644  d1 = 1.;
645  }
646  else
647  {
648  d2 = n2;
649  // d1 = (n2 < 0.5) ? copysign(1. - n2, mu) : mu*n2;
650  d1 = mu*n2;
651  }
652 
653  if (swap_columns)
654  {
655  Swap(d1, d2);
656  }
657 
658  return false;
659 }
660 
661 /// Utility function used in CalcEigenvalues<3>.
662 MFEM_HOST_DEVICE static inline
663 void Vec_normalize3_aux(const double &x1, const double &x2,
664  const double &x3,
665  double &n1, double &n2, double &n3)
666 {
667  double t, r;
668 
669  const double m = fabs(x1);
670  r = x2/m;
671  t = 1. + r*r;
672  r = x3/m;
673  t = sqrt(1./(t + r*r));
674  n1 = copysign(t, x1);
675  t /= m;
676  n2 = x2*t;
677  n3 = x3*t;
678 }
679 
680 /// Utility function used in CalcEigenvalues<3>.
681 MFEM_HOST_DEVICE static inline
682 void Vec_normalize3(const double &x1, const double &x2, const double &x3,
683  double &n1, double &n2, double &n3)
684 {
685  // should work ok when xk is the same as nk for some or all k
686  if (fabs(x1) >= fabs(x2))
687  {
688  if (fabs(x1) >= fabs(x3))
689  {
690  if (x1 != 0.)
691  {
692  Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
693  }
694  else
695  {
696  n1 = n2 = n3 = 0.;
697  }
698  return;
699  }
700  }
701  else if (fabs(x2) >= fabs(x3))
702  {
703  Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
704  return;
705  }
706  Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
707 }
708 
709 /// Utility function used in CalcEigenvalues<3>.
710 MFEM_HOST_DEVICE static inline
711 int KernelVector3G_aux(const int &mode,
712  double &d1, double &d2, double &d3,
713  double &c12, double &c13, double &c23,
714  double &c21, double &c31, double &c32)
715 {
716  int kdim;
717  double mu, n1, n2, n3, s1, s2, s3;
718 
719  s1 = hypot(c21, c31);
720  n1 = hypot(d1, s1);
721 
722  if (s1 != 0.)
723  {
724  // v = (s1, s2, s3)^t, |v| = 1
725  // Q = I - 2 v v^t, Q (d1, c12, c13)^t = (mu, 0, 0)^t
726  mu = copysign(n1, d1);
727  n1 = -s1*(s1/(d1 + mu)); // = d1 - mu
728  d1 = mu;
729 
730  // normalize (n1,c21,c31) to avoid overflow/underflow
731  // normalize (n1,c21,c31) by the max-norm to avoid the sqrt call
732  if (fabs(n1) >= fabs(c21))
733  {
734  if (fabs(n1) >= fabs(c31))
735  {
736  // n1 is max, (s1,s2,s3) <-- (1,c21/n1,c31/n1)
737  s2 = c21/n1;
738  s3 = c31/n1;
739  mu = 2./(1. + s2*s2 + s3*s3);
740  n2 = mu*(c12 + s2*d2 + s3*c32);
741  n3 = mu*(c13 + s2*c23 + s3*d3);
742  c12 = c12 - n2;
743  d2 = d2 - s2*n2;
744  c32 = c32 - s3*n2;
745  c13 = c13 - n3;
746  c23 = c23 - s2*n3;
747  d3 = d3 - s3*n3;
748  goto done_column_1;
749  }
750  }
751  else if (fabs(c21) >= fabs(c31))
752  {
753  // c21 is max, (s1,s2,s3) <-- (n1/c21,1,c31/c21)
754  s1 = n1/c21;
755  s3 = c31/c21;
756  mu = 2./(1. + s1*s1 + s3*s3);
757  n2 = mu*(s1*c12 + d2 + s3*c32);
758  n3 = mu*(s1*c13 + c23 + s3*d3);
759  c12 = c12 - s1*n2;
760  d2 = d2 - n2;
761  c32 = c32 - s3*n2;
762  c13 = c13 - s1*n3;
763  c23 = c23 - n3;
764  d3 = d3 - s3*n3;
765  goto done_column_1;
766  }
767  // c31 is max, (s1,s2,s3) <-- (n1/c31,c21/c31,1)
768  s1 = n1/c31;
769  s2 = c21/c31;
770  mu = 2./(1. + s1*s1 + s2*s2);
771  n2 = mu*(s1*c12 + s2*d2 + c32);
772  n3 = mu*(s1*c13 + s2*c23 + d3);
773  c12 = c12 - s1*n2;
774  d2 = d2 - s2*n2;
775  c32 = c32 - n2;
776  c13 = c13 - s1*n3;
777  c23 = c23 - s2*n3;
778  d3 = d3 - n3;
779  }
780 
781 done_column_1:
782 
783  // Solve:
784  // | d2 c23 | | z2 | = | 0 |
785  // | c32 d3 | | z3 | | 0 |
786  if (KernelVector2G(mode, d2, c23, c32, d3))
787  {
788  // Have two solutions:
789  // two vectors in the kernel are P (-c12/d1, 1, 0)^t and
790  // P (-c13/d1, 0, 1)^t where P is the permutation matrix swapping
791  // entries 1 and col.
792 
793  // A vector orthogonal to both these vectors is P (1, c12/d1, c13/d1)^t
794  d2 = c12/d1;
795  d3 = c13/d1;
796  d1 = 1.;
797  kdim = 2;
798  }
799  else
800  {
801  // solve for z1:
802  // note: |z1| <= a since |z2| + |z3| = 1, and
803  // max{|c12|,|c13|} <= max{norm(col. 2),norm(col. 3)}
804  // <= norm(col. 1) <= a |d1|
805  // a = 1, if using l2-norm for column pivoting
806  // a = sqrt(3), if using l1-norm
807  d1 = -(c12*d2 + c13*d3)/d1;
808  kdim = 1;
809  }
810 
811  Vec_normalize3(d1, d2, d3, d1, d2, d3);
812 
813  return kdim;
814 }
815 
816 /// Utility function used in CalcEigenvalues<3>.
817 MFEM_HOST_DEVICE static inline
818 int KernelVector3S(const int &mode, const double &d12,
819  const double &d13, const double &d23,
820  double &d1, double &d2, double &d3)
821 {
822  // Find a unit vector (z1,z2,z3) in the "near"-kernel of the matrix
823  // | d1 d12 d13 |
824  // | d12 d2 d23 |
825  // | d13 d23 d3 |
826  // using QR factorization.
827  // The vector (z1,z2,z3) is returned in (d1,d2,d3).
828  // Returns the dimension of the kernel, kdim, but never zero.
829  // - if kdim == 3, then (d1,d2,d3) is not defined,
830  // - if kdim == 2, then (d1,d2,d3) is a vector orthogonal to the kernel,
831  // - otherwise kdim == 1 and (d1,d2,d3) is a vector in the "near"-kernel.
832 
833  double c12 = d12, c13 = d13, c23 = d23;
834  double c21, c31, c32;
835  int col, row;
836 
837  // l1-norms of the columns:
838  c32 = fabs(d1) + fabs(c12) + fabs(c13);
839  c31 = fabs(d2) + fabs(c12) + fabs(c23);
840  c21 = fabs(d3) + fabs(c13) + fabs(c23);
841 
842  // column pivoting: choose the column with the largest norm
843  if (c32 >= c21)
844  {
845  col = (c32 >= c31) ? 1 : 2;
846  }
847  else
848  {
849  col = (c31 >= c21) ? 2 : 3;
850  }
851  switch (col)
852  {
853  case 1:
854  if (c32 == 0.) // zero matrix
855  {
856  return 3;
857  }
858  break;
859 
860  case 2:
861  if (c31 == 0.) // zero matrix
862  {
863  return 3;
864  }
865  Swap(c13, c23);
866  Swap(d1, d2);
867  break;
868 
869  case 3:
870  if (c21 == 0.) // zero matrix
871  {
872  return 3;
873  }
874  Swap(c12, c23);
875  Swap(d1, d3);
876  }
877 
878  // row pivoting depending on 'mode'
879  if (mode == 0)
880  {
881  if (fabs(d1) <= fabs(c13))
882  {
883  row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
884  }
885  else
886  {
887  row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
888  }
889  }
890  else
891  {
892  if (fabs(d1) >= fabs(c13))
893  {
894  row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
895  }
896  else
897  {
898  row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
899  }
900  }
901  switch (row)
902  {
903  case 1:
904  c21 = c12;
905  c31 = c13;
906  c32 = c23;
907  break;
908 
909  case 2:
910  c21 = d1;
911  c31 = c13;
912  c32 = c23;
913  d1 = c12;
914  c12 = d2;
915  d2 = d1;
916  c13 = c23;
917  c23 = c31;
918  break;
919 
920  case 3:
921  c21 = c12;
922  c31 = d1;
923  c32 = c12;
924  d1 = c13;
925  c12 = c23;
926  c13 = d3;
927  d3 = d1;
928  }
929  row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
930  // row is kdim
931 
932  switch (col)
933  {
934  case 2:
935  Swap(d1, d2);
936  break;
937 
938  case 3:
939  Swap(d1, d3);
940  }
941  return row;
942 }
943 
944 /// Utility function used in CalcEigenvalues<3>.
945 MFEM_HOST_DEVICE static inline
946 int Reduce3S(const int &mode,
947  double &d1, double &d2, double &d3,
948  double &d12, double &d13, double &d23,
949  double &z1, double &z2, double &z3,
950  double &v1, double &v2, double &v3,
951  double &g)
952 {
953  // Given the matrix
954  // | d1 d12 d13 |
955  // A = | d12 d2 d23 |
956  // | d13 d23 d3 |
957  // and a unit eigenvector z=(z1,z2,z3), transform the matrix A into the
958  // matrix B = Q P A P Q that has the form
959  // | b1 0 0 |
960  // B = Q P A P Q = | 0 b2 b23 |
961  // | 0 b23 b3 |
962  // where P is the permutation matrix switching entries 1 and k, and
963  // Q is the reflection matrix Q = I - g v v^t, defined by: set y = P z and
964  // v = c(y - e_1); if y = e_1, then v = 0 and Q = I.
965  // Note: Q y = e_1, Q e_1 = y ==> Q P A P Q e_1 = ... = lambda e_1.
966  // The entries (b1,b2,b3,b23) are returned in (d1,d2,d3,d23), and the
967  // return value of the function is k. The variable g = 2/(v1^2+v2^2+v3^3).
968 
969  int k;
970  double s, w1, w2, w3;
971 
972  if (mode == 0)
973  {
974  // choose k such that z^t e_k = zk has the smallest absolute value, i.e.
975  // the angle between z and e_k is closest to pi/2
976  if (fabs(z1) <= fabs(z3))
977  {
978  k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
979  }
980  else
981  {
982  k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
983  }
984  }
985  else
986  {
987  // choose k such that zk is the largest by absolute value
988  if (fabs(z1) >= fabs(z3))
989  {
990  k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
991  }
992  else
993  {
994  k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
995  }
996  }
997  switch (k)
998  {
999  case 2:
1000  Swap(d13, d23);
1001  Swap(d1, d2);
1002  Swap(z1, z2);
1003  break;
1004 
1005  case 3:
1006  Swap(d12, d23);
1007  Swap(d1, d3);
1008  Swap(z1, z3);
1009  }
1010 
1011  s = hypot(z2, z3);
1012 
1013  if (s == 0.)
1014  {
1015  // s can not be zero, if zk is the smallest (mode == 0)
1016  v1 = v2 = v3 = 0.;
1017  g = 1.;
1018  }
1019  else
1020  {
1021  g = copysign(1., z1);
1022  v1 = -s*(s/(z1 + g)); // = z1 - g
1023  // normalize (v1,z2,z3) by its max-norm, avoiding the sqrt call
1024  g = fabs(v1);
1025  if (fabs(z2) > g) { g = fabs(z2); }
1026  if (fabs(z3) > g) { g = fabs(z3); }
1027  v1 = v1/g;
1028  v2 = z2/g;
1029  v3 = z3/g;
1030  g = 2./(v1*v1 + v2*v2 + v3*v3);
1031 
1032  // Compute Q A Q = A - v w^t - w v^t, where
1033  // w = u - (g/2)(v^t u) v, and u = g A v
1034  // set w = g A v
1035  w1 = g*( d1*v1 + d12*v2 + d13*v3);
1036  w2 = g*(d12*v1 + d2*v2 + d23*v3);
1037  w3 = g*(d13*v1 + d23*v2 + d3*v3);
1038  // w := w - (g/2)(v^t w) v
1039  s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
1040  w1 -= s*v1;
1041  w2 -= s*v2;
1042  w3 -= s*v3;
1043  // dij -= vi*wj + wi*vj
1044  d1 -= 2*v1*w1;
1045  d2 -= 2*v2*w2;
1046  d23 -= v2*w3 + v3*w2;
1047  d3 -= 2*v3*w3;
1048  // compute the off-diagonal entries on the first row/column of B which
1049  // should be zero (for debugging):
1050 #if 0
1051  s = d12 - v1*w2 - v2*w1; // b12 = 0
1052  s = d13 - v1*w3 - v3*w1; // b13 = 0
1053 #endif
1054  }
1055 
1056  switch (k)
1057  {
1058  case 2:
1059  Swap(z1, z2);
1060  break;
1061  case 3:
1062  Swap(z1, z3);
1063  }
1064  return k;
1065 }
1066 
1067 } // namespace kernels::internal
1068 
1069 // Implementations of CalcLeftInverse for dim = 1, 2.
1070 
1071 template<> MFEM_HOST_DEVICE inline
1072 void CalcLeftInverse<2,1>(const double *d, double *left_inv)
1073 {
1074  const double t = 1.0 / (d[0]*d[0] + d[1]*d[1]);
1075  left_inv[0] = d[0] * t;
1076  left_inv[1] = d[1] * t;
1077 }
1078 
1079 template<> MFEM_HOST_DEVICE inline
1080 void CalcLeftInverse<3,1>(const double *d, double *left_inv)
1081 {
1082  const double t = 1.0 / (d[0]*d[0] + d[1]*d[1] + d[2]*d[2]);
1083  left_inv[0] = d[0] * t;
1084  left_inv[1] = d[1] * t;
1085  left_inv[2] = d[2] * t;
1086 }
1087 
1088 template<> MFEM_HOST_DEVICE inline
1089 void CalcLeftInverse<3,2>(const double *d, double *left_inv)
1090 {
1091  double e = d[0]*d[0] + d[1]*d[1] + d[2]*d[2];
1092  double g = d[3]*d[3] + d[4]*d[4] + d[5]*d[5];
1093  double f = d[0]*d[3] + d[1]*d[4] + d[2]*d[5];
1094  const double t = 1.0 / (e*g - f*f);
1095  e *= t; g *= t; f *= t;
1096 
1097  left_inv[0] = d[0]*g - d[3]*f;
1098  left_inv[1] = d[3]*e - d[0]*f;
1099  left_inv[2] = d[1]*g - d[4]*f;
1100  left_inv[3] = d[4]*e - d[1]*f;
1101  left_inv[4] = d[2]*g - d[5]*f;
1102  left_inv[5] = d[5]*e - d[2]*f;
1103 }
1104 
1105 // Implementations of CalcEigenvalues and CalcSingularvalue for dim = 2, 3.
1106 
1107 /// Compute the spectrum of the matrix of size 2 with given @a data, returning
1108 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1109 /// vec (listed consecutively).
1110 template<> MFEM_HOST_DEVICE inline
1111 void CalcEigenvalues<2>(const double *data, double *lambda, double *vec)
1112 {
1113  double d0 = data[0];
1114  double d2 = data[2]; // use the upper triangular entry
1115  double d3 = data[3];
1116  double c, s;
1117  internal::Eigensystem2S(d2, d0, d3, c, s);
1118  if (d0 <= d3)
1119  {
1120  lambda[0] = d0;
1121  lambda[1] = d3;
1122  vec[0] = c;
1123  vec[1] = -s;
1124  vec[2] = s;
1125  vec[3] = c;
1126  }
1127  else
1128  {
1129  lambda[0] = d3;
1130  lambda[1] = d0;
1131  vec[0] = s;
1132  vec[1] = c;
1133  vec[2] = c;
1134  vec[3] = -s;
1135  }
1136 }
1137 
1138 /// Compute the spectrum of the matrix of size 3 with given @a data, returning
1139 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1140 /// vec (listed consecutively).
1141 template<> MFEM_HOST_DEVICE inline
1142 void CalcEigenvalues<3>(const double *data, double *lambda, double *vec)
1143 {
1144  double d11 = data[0];
1145  double d12 = data[3]; // use the upper triangular entries
1146  double d22 = data[4];
1147  double d13 = data[6];
1148  double d23 = data[7];
1149  double d33 = data[8];
1150 
1151  double mult;
1152  {
1153  double d_max = fabs(d11);
1154  if (d_max < fabs(d22)) { d_max = fabs(d22); }
1155  if (d_max < fabs(d33)) { d_max = fabs(d33); }
1156  if (d_max < fabs(d12)) { d_max = fabs(d12); }
1157  if (d_max < fabs(d13)) { d_max = fabs(d13); }
1158  if (d_max < fabs(d23)) { d_max = fabs(d23); }
1159 
1160  internal::GetScalingFactor(d_max, mult);
1161  }
1162 
1163  d11 /= mult; d22 /= mult; d33 /= mult;
1164  d12 /= mult; d13 /= mult; d23 /= mult;
1165 
1166  double aa = (d11 + d22 + d33)/3; // aa = tr(A)/3
1167  double c1 = d11 - aa;
1168  double c2 = d22 - aa;
1169  double c3 = d33 - aa;
1170 
1171  double Q, R;
1172 
1173  Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
1174  R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
1175 
1176  if (Q <= 0.)
1177  {
1178  lambda[0] = lambda[1] = lambda[2] = aa;
1179  vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1180  vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1181  vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1182  }
1183  else
1184  {
1185  double sqrtQ = sqrt(Q);
1186  double sqrtQ3 = Q*sqrtQ;
1187  // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1188  // double sqrtQ3 = pow(Q, 1.5);
1189  double r;
1190  if (fabs(R) >= sqrtQ3)
1191  {
1192  if (R < 0.)
1193  {
1194  // R = -1.;
1195  r = 2*sqrtQ;
1196  }
1197  else
1198  {
1199  // R = 1.;
1200  r = -2*sqrtQ;
1201  }
1202  }
1203  else
1204  {
1205  R = R/sqrtQ3;
1206 
1207  if (R < 0.)
1208  {
1209  r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1210  }
1211  else
1212  {
1213  r = -2*sqrtQ*cos(acos(R)/3); // min
1214  }
1215  }
1216 
1217  aa += r;
1218  c1 = d11 - aa;
1219  c2 = d22 - aa;
1220  c3 = d33 - aa;
1221 
1222  // Type of Householder reflections: z --> mu ek, where k is the index
1223  // of the entry in z with:
1224  // mode == 0: smallest absolute value --> angle closest to pi/2
1225  // mode == 1: largest absolute value --> angle farthest from pi/2
1226  // Observations:
1227  // mode == 0 produces better eigenvectors, less accurate eigenvalues?
1228  // mode == 1 produces better eigenvalues, less accurate eigenvectors?
1229  const int mode = 0;
1230 
1231  // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1232  // | c1 d12 d13 |
1233  // | d12 c2 d23 | = A - aa*I
1234  // | d13 d23 c3 |
1235  // This vector is also an eigenvector for A corresponding to aa.
1236  // The vector z overwrites (c1,c2,c3).
1237  switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
1238  {
1239  case 3:
1240  // 'aa' is a triple eigenvalue
1241  lambda[0] = lambda[1] = lambda[2] = aa;
1242  vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1243  vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1244  vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1245  goto done_3d;
1246 
1247  case 2:
1248  // ok, continue with the returned vector orthogonal to the kernel
1249  case 1:
1250  // ok, continue with the returned vector in the "near"-kernel
1251  ;
1252  }
1253 
1254  // Using the eigenvector c=(c1,c2,c3) transform A into
1255  // | d11 0 0 |
1256  // A <-- Q P A P Q = | 0 d22 d23 |
1257  // | 0 d23 d33 |
1258  double v1, v2, v3, g;
1259  int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1260  c1, c2, c3, v1, v2, v3, g);
1261  // Q = I - 2 v v^t
1262  // P - permutation matrix switching entries 1 and k
1263 
1264  // find the eigenvalues and eigenvectors for
1265  // | d22 d23 |
1266  // | d23 d33 |
1267  double c, s;
1268  internal::Eigensystem2S(d23, d22, d33, c, s);
1269  // d22 <-> P Q (0, c, -s), d33 <-> P Q (0, s, c)
1270 
1271  double *vec_1, *vec_2, *vec_3;
1272  if (d11 <= d22)
1273  {
1274  if (d22 <= d33)
1275  {
1276  lambda[0] = d11; vec_1 = vec;
1277  lambda[1] = d22; vec_2 = vec + 3;
1278  lambda[2] = d33; vec_3 = vec + 6;
1279  }
1280  else if (d11 <= d33)
1281  {
1282  lambda[0] = d11; vec_1 = vec;
1283  lambda[1] = d33; vec_3 = vec + 3;
1284  lambda[2] = d22; vec_2 = vec + 6;
1285  }
1286  else
1287  {
1288  lambda[0] = d33; vec_3 = vec;
1289  lambda[1] = d11; vec_1 = vec + 3;
1290  lambda[2] = d22; vec_2 = vec + 6;
1291  }
1292  }
1293  else
1294  {
1295  if (d11 <= d33)
1296  {
1297  lambda[0] = d22; vec_2 = vec;
1298  lambda[1] = d11; vec_1 = vec + 3;
1299  lambda[2] = d33; vec_3 = vec + 6;
1300  }
1301  else if (d22 <= d33)
1302  {
1303  lambda[0] = d22; vec_2 = vec;
1304  lambda[1] = d33; vec_3 = vec + 3;
1305  lambda[2] = d11; vec_1 = vec + 6;
1306  }
1307  else
1308  {
1309  lambda[0] = d33; vec_3 = vec;
1310  lambda[1] = d22; vec_2 = vec + 3;
1311  lambda[2] = d11; vec_1 = vec + 6;
1312  }
1313  }
1314 
1315  vec_1[0] = c1;
1316  vec_1[1] = c2;
1317  vec_1[2] = c3;
1318  d22 = g*(v2*c - v3*s);
1319  d33 = g*(v2*s + v3*c);
1320  vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1321  vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1322  vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1323  switch (k)
1324  {
1325  case 2:
1326  internal::Swap(vec_2[0], vec_2[1]);
1327  internal::Swap(vec_3[0], vec_3[1]);
1328  break;
1329 
1330  case 3:
1331  internal::Swap(vec_2[0], vec_2[2]);
1332  internal::Swap(vec_3[0], vec_3[2]);
1333  }
1334  }
1335 
1336 done_3d:
1337  lambda[0] *= mult;
1338  lambda[1] *= mult;
1339  lambda[2] *= mult;
1340 }
1341 
1342 /// Return the i'th singular value of the matrix of size 2 with given @a data.
1343 template<> MFEM_HOST_DEVICE inline
1344 double CalcSingularvalue<2>(const double *data, const int i)
1345 {
1346  double d0, d1, d2, d3;
1347  d0 = data[0];
1348  d1 = data[1];
1349  d2 = data[2];
1350  d3 = data[3];
1351  double mult;
1352 
1353  {
1354  double d_max = fabs(d0);
1355  if (d_max < fabs(d1)) { d_max = fabs(d1); }
1356  if (d_max < fabs(d2)) { d_max = fabs(d2); }
1357  if (d_max < fabs(d3)) { d_max = fabs(d3); }
1358  internal::GetScalingFactor(d_max, mult);
1359  }
1360 
1361  d0 /= mult;
1362  d1 /= mult;
1363  d2 /= mult;
1364  d3 /= mult;
1365 
1366  double t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1367  double s = d0*d2 + d1*d3;
1368  s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1369 
1370  if (s == 0.0)
1371  {
1372  return 0.0;
1373  }
1374  t = fabs(d0*d3 - d1*d2) / s;
1375  if (t > s)
1376  {
1377  if (i == 0)
1378  {
1379  return t*mult;
1380  }
1381  return s*mult;
1382  }
1383  if (i == 0)
1384  {
1385  return s*mult;
1386  }
1387  return t*mult;
1388 }
1389 
1390 /// Return the i'th singular value of the matrix of size 3 with given @a data.
1391 template<> MFEM_HOST_DEVICE inline
1392 double CalcSingularvalue<3>(const double *data, const int i)
1393 {
1394  double d0, d1, d2, d3, d4, d5, d6, d7, d8;
1395  d0 = data[0]; d3 = data[3]; d6 = data[6];
1396  d1 = data[1]; d4 = data[4]; d7 = data[7];
1397  d2 = data[2]; d5 = data[5]; d8 = data[8];
1398  double mult;
1399  {
1400  double d_max = fabs(d0);
1401  if (d_max < fabs(d1)) { d_max = fabs(d1); }
1402  if (d_max < fabs(d2)) { d_max = fabs(d2); }
1403  if (d_max < fabs(d3)) { d_max = fabs(d3); }
1404  if (d_max < fabs(d4)) { d_max = fabs(d4); }
1405  if (d_max < fabs(d5)) { d_max = fabs(d5); }
1406  if (d_max < fabs(d6)) { d_max = fabs(d6); }
1407  if (d_max < fabs(d7)) { d_max = fabs(d7); }
1408  if (d_max < fabs(d8)) { d_max = fabs(d8); }
1409  internal::GetScalingFactor(d_max, mult);
1410  }
1411 
1412  d0 /= mult; d1 /= mult; d2 /= mult;
1413  d3 /= mult; d4 /= mult; d5 /= mult;
1414  d6 /= mult; d7 /= mult; d8 /= mult;
1415 
1416  double b11 = d0*d0 + d1*d1 + d2*d2;
1417  double b12 = d0*d3 + d1*d4 + d2*d5;
1418  double b13 = d0*d6 + d1*d7 + d2*d8;
1419  double b22 = d3*d3 + d4*d4 + d5*d5;
1420  double b23 = d3*d6 + d4*d7 + d5*d8;
1421  double b33 = d6*d6 + d7*d7 + d8*d8;
1422 
1423  // double a, b, c;
1424  // a = -(b11 + b22 + b33);
1425  // b = b11*(b22 + b33) + b22*b33 - b12*b12 - b13*b13 - b23*b23;
1426  // c = b11*(b23*b23 - b22*b33) + b12*(b12*b33 - 2*b13*b23) + b13*b13*b22;
1427 
1428  // double Q = (a * a - 3 * b) / 9;
1429  // double Q = (b12*b12 + b13*b13 + b23*b23 +
1430  // ((b11 - b22)*(b11 - b22) +
1431  // (b11 - b33)*(b11 - b33) +
1432  // (b22 - b33)*(b22 - b33))/6)/3;
1433  // Q = (3*(b12^2 + b13^2 + b23^2) +
1434  // ((b11 - b22)^2 + (b11 - b33)^2 + (b22 - b33)^2)/2)/9
1435  // or
1436  // Q = (1/6)*|B-tr(B)/3|_F^2
1437  // Q >= 0 and
1438  // Q = 0 <==> B = scalar * I
1439  // double R = (2 * a * a * a - 9 * a * b + 27 * c) / 54;
1440  double aa = (b11 + b22 + b33)/3; // aa = tr(B)/3
1441  double c1, c2, c3;
1442  // c1 = b11 - aa; // ((b11 - b22) + (b11 - b33))/3
1443  // c2 = b22 - aa; // ((b22 - b11) + (b22 - b33))/3
1444  // c3 = b33 - aa; // ((b33 - b11) + (b33 - b22))/3
1445  {
1446  double b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1447  double b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1448  double b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1449  c1 = (b11_b22 - b33_b11)/3;
1450  c2 = (b22_b33 - b11_b22)/3;
1451  c3 = (b33_b11 - b22_b33)/3;
1452  }
1453  double Q, R;
1454  Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1455  R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1456  // R = (-1/2)*det(B-(tr(B)/3)*I)
1457  // Note: 54*(det(S))^2 <= |S|_F^6, when S^t=S and tr(S)=0, S is 3x3
1458  // Therefore: R^2 <= Q^3
1459 
1460  if (Q <= 0.) { ; }
1461 
1462  // else if (fabs(R) >= sqrtQ3)
1463  // {
1464  // double det = (d[0] * (d[4] * d[8] - d[5] * d[7]) +
1465  // d[3] * (d[2] * d[7] - d[1] * d[8]) +
1466  // d[6] * (d[1] * d[5] - d[2] * d[4]));
1467  //
1468  // if (R > 0.)
1469  // {
1470  // if (i == 2)
1471  // // aa -= 2*sqrtQ;
1472  // return fabs(det)/(aa + sqrtQ);
1473  // else
1474  // aa += sqrtQ;
1475  // }
1476  // else
1477  // {
1478  // if (i != 0)
1479  // aa -= sqrtQ;
1480  // // aa = fabs(det)/sqrt(aa + 2*sqrtQ);
1481  // else
1482  // aa += 2*sqrtQ;
1483  // }
1484  // }
1485 
1486  else
1487  {
1488  double sqrtQ = sqrt(Q);
1489  double sqrtQ3 = Q*sqrtQ;
1490  // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1491  // double sqrtQ3 = pow(Q, 1.5);
1492  double r;
1493 
1494  if (fabs(R) >= sqrtQ3)
1495  {
1496  if (R < 0.)
1497  {
1498  // R = -1.;
1499  r = 2*sqrtQ;
1500  }
1501  else
1502  {
1503  // R = 1.;
1504  r = -2*sqrtQ;
1505  }
1506  }
1507  else
1508  {
1509  R = R/sqrtQ3;
1510 
1511  // if (fabs(R) <= 0.95)
1512  if (fabs(R) <= 0.9)
1513  {
1514  if (i == 2)
1515  {
1516  aa -= 2*sqrtQ*cos(acos(R)/3); // min
1517  }
1518  else if (i == 0)
1519  {
1520  aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1521  }
1522  else
1523  {
1524  aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3); // mid
1525  }
1526  goto have_aa;
1527  }
1528 
1529  if (R < 0.)
1530  {
1531  r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1532  if (i == 0)
1533  {
1534  aa += r;
1535  goto have_aa;
1536  }
1537  }
1538  else
1539  {
1540  r = -2*sqrtQ*cos(acos(R)/3); // min
1541  if (i == 2)
1542  {
1543  aa += r;
1544  goto have_aa;
1545  }
1546  }
1547  }
1548 
1549  // (tr(B)/3 + r) is the root which is separated from the other
1550  // two roots which are close to each other when |R| is close to 1
1551 
1552  c1 -= r;
1553  c2 -= r;
1554  c3 -= r;
1555  // aa += r;
1556 
1557  // Type of Householder reflections: z --> mu ek, where k is the index
1558  // of the entry in z with:
1559  // mode == 0: smallest absolute value --> angle closest to pi/2
1560  // (eliminate large entries)
1561  // mode == 1: largest absolute value --> angle farthest from pi/2
1562  // (eliminate small entries)
1563  const int mode = 1;
1564 
1565  // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1566  // | c1 b12 b13 |
1567  // | b12 c2 b23 | = B - aa*I
1568  // | b13 b23 c3 |
1569  // This vector is also an eigenvector for B corresponding to aa
1570  // The vector z overwrites (c1,c2,c3).
1571  switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1572  {
1573  case 3:
1574  aa += r;
1575  goto have_aa;
1576  case 2:
1577  // ok, continue with the returned vector orthogonal to the kernel
1578  case 1:
1579  // ok, continue with the returned vector in the "near"-kernel
1580  ;
1581  }
1582 
1583  // Using the eigenvector c = (c1,c2,c3) to transform B into
1584  // | b11 0 0 |
1585  // B <-- Q P B P Q = | 0 b22 b23 |
1586  // | 0 b23 b33 |
1587  double v1, v2, v3, g;
1588  internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1589  c1, c2, c3, v1, v2, v3, g);
1590  // Q = I - g v v^t
1591  // P - permutation matrix switching rows and columns 1 and k
1592 
1593  // find the eigenvalues of
1594  // | b22 b23 |
1595  // | b23 b33 |
1596  internal::Eigenvalues2S(b23, b22, b33);
1597 
1598  if (i == 2)
1599  {
1600  aa = fmin(fmin(b11, b22), b33);
1601  }
1602  else if (i == 1)
1603  {
1604  if (b11 <= b22)
1605  {
1606  aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1607  }
1608  else
1609  {
1610  aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1611  }
1612  }
1613  else
1614  {
1615  aa = fmax(fmax(b11, b22), b33);
1616  }
1617  }
1618 
1619 have_aa:
1620 
1621  return sqrt(fabs(aa))*mult; // take abs before we sort?
1622 }
1623 
1624 
1625 /// Assuming L.U = P.A for a factored matrix (m x m),
1626 // compute x <- A x
1627 //
1628 // @param [in] data LU factorization of A
1629 // @param [in] m square matrix height
1630 // @param [in] ipiv array storing pivot information
1631 // @param [in, out] x vector storing right-hand side and then solution
1632 MFEM_HOST_DEVICE
1633 inline void LUSolve(const double *data, const int m, const int *ipiv,
1634  double *x)
1635 {
1636  // X <- P X
1637  for (int i = 0; i < m; i++)
1638  {
1639  internal::Swap<double>(x[i], x[ipiv[i]]);
1640  }
1641 
1642  // X <- L^{-1} X
1643  for (int j = 0; j < m; j++)
1644  {
1645  const double x_j = x[j];
1646  for (int i = j + 1; i < m; i++)
1647  {
1648  x[i] -= data[i + j * m] * x_j;
1649  }
1650  }
1651 
1652  // X <- U^{-1} X
1653  for (int j = m - 1; j >= 0; j--)
1654  {
1655  const double x_j = (x[j] /= data[j + j * m]);
1656  for (int i = 0; i < j; i++)
1657  {
1658  x[i] -= data[i + j * m] * x_j;
1659  }
1660  }
1661 }
1662 
1663 } // namespace kernels
1664 
1665 } // namespace mfem
1666 
1667 #endif // MFEM_LINALG_KERNELS_HPP
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition: kernels.hpp:237
MFEM_HOST_DEVICE double DistanceSquared(const double *x, const double *y)
Compute the square of the Euclidean distance to another vector.
Definition: kernels.hpp:40
MFEM_HOST_DEVICE void FNorm(double &scale_factor, double &scaled_fnorm2, const T *data)
Definition: kernels.hpp:79
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
Definition: kernels.hpp:363
double epsilon
Definition: ex25.cpp:141
MFEM_HOST_DEVICE void Diag(const double c, double *data)
Creates n x n diagonal matrix with diagonal elements c.
Definition: kernels.hpp:49
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const double *d, double *left_inv)
Definition: kernels.hpp:1080
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata...
Definition: kernels.hpp:266
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const double *d, double *left_inv)
Definition: kernels.hpp:1072
MFEM_HOST_DEVICE void MultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *AtBdata)
Multiply the transpose of a matrix of size Aheight x Awidth and data Adata with a matrix of size Ahei...
Definition: kernels.hpp:390
Vector beta
MFEM_HOST_DEVICE double CalcSingularvalue< 3 >(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size 3 with given data.
Definition: kernels.hpp:1392
std::function< double(const Vector &)> f(double mass_coeff)
Definition: lor_mms.hpp:30
MFEM_HOST_DEVICE void CalcEigenvalues(const double *data, double *lambda, double *vec)
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const double *data, double *lambda, double *vec)
Definition: kernels.hpp:1111
double b
Definition: lissajous.cpp:42
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const double *d, double *left_inv)
Definition: kernels.hpp:1089
MFEM_HOST_DEVICE void LUSolve(const double *data, const int m, const int *ipiv, double *x)
Assuming L.U = P.A for a factored matrix (m x m),.
Definition: kernels.hpp:1633
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition: kernels.hpp:133
MFEM_HOST_DEVICE void CalcAdjugate(const T *data, T *adj_data)
Return the adjugate of a matrix.
Definition: kernels.hpp:256
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data...
Definition: kernels.hpp:163
void Swap(Array< T > &, Array< T > &)
Definition: array.hpp:638
MFEM_HOST_DEVICE void MultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix transpose vector multiplication: y = At x, where the matrix A is of size height x width with g...
Definition: kernels.hpp:196
MFEM_HOST_DEVICE double CalcSingularvalue(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE double CalcSingularvalue< 2 >(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size 2 with given data.
Definition: kernels.hpp:1344
double a
Definition: lissajous.cpp:41
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const double *data, double *lambda, double *vec)
Definition: kernels.hpp:1142
double mu
Definition: ex25.cpp:140
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data...
Definition: kernels.hpp:246
int dim
Definition: ex24.cpp:53
MFEM_HOST_DEVICE void Subtract(const double a, const double *x, const double *y, double *z)
Vector subtraction operation: z = a * (x - y)
Definition: kernels.hpp:58
RefCoord t[3]
MFEM_HOST_DEVICE void Set(const int height, const int width, const double alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata...
Definition: kernels.hpp:326
const double alpha
Definition: ex15.cpp:369
RefCoord s[3]
MFEM_HOST_DEVICE void AddMultVWt(const double *v, const double *w, double *VWt)
Dense matrix operation: VWt += v w^t.
Definition: kernels.hpp:67
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -> (A+A^T)/2.
Definition: kernels.hpp:223
MFEM_HOST_DEVICE void CalcLeftInverse(const double *data, double *left_inv)
Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
MFEM_HOST_DEVICE double FNorm2(const T *data)
Compute the square of the Frobenius norm of the matrix.
Definition: kernels.hpp:123