|
| 1 | +#ifndef AMREX_LU_SOLVER_H_ |
| 2 | +#define AMREX_LU_SOLVER_H_ |
| 3 | +#include <AMReX_Config.H> |
| 4 | + |
| 5 | +#include <AMReX_Arena.H> |
| 6 | +#include <AMReX_Array.H> |
| 7 | +#include <cmath> |
| 8 | +#include <limits> |
| 9 | + |
| 10 | +namespace amrex { |
| 11 | + |
| 12 | +// https://en.wikipedia.org/wiki/LU_decomposition |
| 13 | + |
| 14 | +template <int N, typename T> |
| 15 | +class LUSolver |
| 16 | +{ |
| 17 | +public: |
| 18 | + |
| 19 | + LUSolver () = default; |
| 20 | + |
| 21 | + LUSolver (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat); |
| 22 | + |
| 23 | + void define (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat); |
| 24 | + |
| 25 | + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE |
| 26 | + void operator() (T* AMREX_RESTRICT x, T const* AMREX_RESTRICT b) const |
| 27 | + { |
| 28 | + for (int i = 0; i < N; ++i) { |
| 29 | + x[i] = b[m_piv(i)]; |
| 30 | + for (int k = 0; k < i; ++k) { |
| 31 | + x[i] -= m_mat(i,k) * x[k]; |
| 32 | + } |
| 33 | + } |
| 34 | + |
| 35 | + for (int i = N-1; i >= 0; --i) { |
| 36 | + for (int k = i+1; k < N; ++k) { |
| 37 | + x[i] -= m_mat(i,k) * x[k]; |
| 38 | + } |
| 39 | + x[i] *= m_mat(i,i); |
| 40 | + } |
| 41 | + } |
| 42 | + |
| 43 | + [[nodiscard]] AMREX_GPU_HOST_DEVICE |
| 44 | + Array2D<T,0,N-1,0,N-1,Order::C> invert () const |
| 45 | + { |
| 46 | + Array2D<T,0,N-1,0,N-1,Order::C> IA; |
| 47 | + for (int j = 0; j < N; ++j) { |
| 48 | + for (int i = 0; i < N; ++i) { |
| 49 | + IA(i,j) = (m_piv(i) == j) ? T(1.0) : T(0.0); |
| 50 | + for (int k = 0; k < i; ++k) { |
| 51 | + IA(i,j) -= m_mat(i,k) * IA(k,j); |
| 52 | + } |
| 53 | + } |
| 54 | + for (int i = N-1; i >= 0; --i) { |
| 55 | + for (int k = i+1; k < N; ++k) { |
| 56 | + IA(i,j) -= m_mat(i,k) * IA(k,j); |
| 57 | + } |
| 58 | + IA(i,j) *= m_mat(i,i); |
| 59 | + } |
| 60 | + } |
| 61 | + return IA; |
| 62 | + } |
| 63 | + |
| 64 | + [[nodiscard]] AMREX_GPU_HOST_DEVICE |
| 65 | + T determinant () const |
| 66 | + { |
| 67 | + T det = m_mat(0,0); |
| 68 | + for (int i = 1; i < N; ++i) { |
| 69 | + det *= m_mat(i,i); |
| 70 | + } |
| 71 | + det = T(1.0) / det; |
| 72 | + return (m_npivs % 2 == 0) ? det : -det; |
| 73 | + } |
| 74 | + |
| 75 | +private: |
| 76 | + |
| 77 | + void define_innard (); |
| 78 | + |
| 79 | + Array2D<T, 0, N-1, 0, N-1, Order::C> m_mat; |
| 80 | + Array1D<int, 0, N-1> m_piv; |
| 81 | + int m_npivs = 0; |
| 82 | +}; |
| 83 | + |
| 84 | +template <int N, typename T> |
| 85 | +LUSolver<N,T>::LUSolver (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat) |
| 86 | + : m_mat(a_mat) |
| 87 | +{ |
| 88 | + define_innard(); |
| 89 | +} |
| 90 | + |
| 91 | +template <int N, typename T> |
| 92 | +void LUSolver<N,T>::define (Array2D<T, 0, N-1, 0, N-1, Order::C> const& a_mat) |
| 93 | +{ |
| 94 | + m_mat = a_mat; |
| 95 | + define_innard(); |
| 96 | +} |
| 97 | + |
| 98 | +template <int N, typename T> |
| 99 | +void LUSolver<N,T>::define_innard () |
| 100 | +{ |
| 101 | + static_assert(N > 1); |
| 102 | + static_assert(std::is_floating_point_v<T>); |
| 103 | + |
| 104 | + for (int i = 0; i < N; ++i) { m_piv(i) = i; } |
| 105 | + m_npivs = 0; |
| 106 | + |
| 107 | + for (int i = 0; i < N; ++i) { |
| 108 | + T maxA = 0; |
| 109 | + int imax = i; |
| 110 | + |
| 111 | + for (int k = i; k < N; ++k) { |
| 112 | + auto const absA = std::abs(m_mat(k,i)); |
| 113 | + if (absA > maxA) { |
| 114 | + maxA = absA; |
| 115 | + imax = k; |
| 116 | + } |
| 117 | + } |
| 118 | + |
| 119 | + if (maxA < std::numeric_limits<T>::min()) { |
| 120 | + amrex::Abort("LUSolver: matrix is degenerate"); |
| 121 | + } |
| 122 | + |
| 123 | + if (imax != i) { |
| 124 | + std::swap(m_piv(i), m_piv(imax)); |
| 125 | + for (int j = 0; j < N; ++j) { |
| 126 | + std::swap(m_mat(i,j), m_mat(imax,j)); |
| 127 | + } |
| 128 | + ++m_npivs; |
| 129 | + } |
| 130 | + |
| 131 | + for (int j = i+1; j < N; ++j) { |
| 132 | + m_mat(j,i) /= m_mat(i,i); |
| 133 | + for (int k = i+1; k < N; ++k) { |
| 134 | + m_mat(j,k) -= m_mat(j,i) * m_mat(i,k); |
| 135 | + } |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + for (int i = 0; i < N; ++i) { |
| 140 | + m_mat(i,i) = T(1) / m_mat(i,i); |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | +} |
| 145 | + |
| 146 | +#endif |
0 commit comments