1#ifndef TATOOINE_TENSOR_OPERATIONS_SOLVE_H
2#define TATOOINE_TENSOR_OPERATIONS_SOLVE_H
19template <
typename T, std::size_t... Dims>
27template <
typename T, std::size_t... Dims>
29 return tensor<T, Dims...>{x};
31template <fixed_size_quadratic_mat<2> MatA, static_mat MatB>
32requires(tensor_dimension<MatB, 0> == 2)
34 using out_value_type =
36 auto constexpr K = tensor_dimension<MatB, 1>;
39 using out_type = std::optional<out_mat_type>;
41 auto const div = (A(0, 0) * A(1, 1) - A(1, 0) * A(0, 1));
43 return out_type{std::nullopt};
45 auto const p = 1 / div;
46 auto X = out_type{out_mat_type{}};
47 for (std::size_t i = 0; i < K; ++i) {
48 X->at(0, i) = -(A(0, 1) * B(1, i) - A(1, 1) * B(0, i)) * p;
49 X->at(1, i) = (A(0, 0) * B(1, i) - A(1, 0) * B(0, i)) * p;
54template <fixed_size_mat<2, 2> MatA, fixed_size_vec<2> VecB>
57 using out_value_type =
59 auto const div = (A(0, 0) * A(1, 1) - A(1, 0) * A(0, 1));
63 auto const p = 1 / div;
65 (A(0, 0) * b(1) - A(1, 0) * b(0)) * p};
68template <static_quadratic_mat MatA, static_vec VecB>
69requires(tensor_dimension<MatA, 0> ==
70 tensor_dimension<VecB, 0>)
73 tensor_dimension<MatA, 1>>> {
74 static constexpr auto N = tensor_dimension<MatA, 1>;
75 using out_value_type =
77 auto const det_inv = 1 /
det(A);
81 for (std::size_t i = 0; i < N; ++i) {
84 result(i) =
det(Y) * det_inv;
90#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
91template <static_quadratic_mat MatA, static_vec VecB>
92requires(tensor_dimension<MatA, 1> ==
93 tensor_dimension<VecB, 0>)
99 static constexpr auto M = tensor_dimension<MatA, 0>;
100 static constexpr auto N = tensor_dimension<MatA, 1>;
112#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
113template <static_quadratic_mat MatA, static_mat MatB>
114requires(tensor_dimension<MatA, 0> == tensor_dimension<MatB, 0>)
116 using out_value_type =
118 static constexpr auto N = tensor_dimension<MatA, 1>;
119 static constexpr auto K = tensor_dimension<MatB, 1>;
121 using out_type = std::optional<out_mat_type>;
123 auto B = out_type{B_};
127 return out_type{std::nullopt};
133#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
134template <static_mat MatA, static_mat MatB>
135requires(tensor_dimension<MatA, 0> == tensor_dimension<MatB, 0>)
137 using out_value_type =
139 static auto constexpr M = tensor_dimension<MatA, 0>;
140 static auto constexpr N = tensor_dimension<MatA, 1>;
141 static auto constexpr K = tensor_dimension<MatB, 1>;
143 using out_type = std::optional<mat_out_type>;
147 auto tau =
vec<out_value_type, (M < N ? M : N)>{};
151 return out_type{std::nullopt};
156 return out_type{std::nullopt};
161 return out_type{std::nullopt};
163 auto X = out_type{mat_out_type{}};
165 auto const... is)
mutable { X->at(is...) = B(is...); },
171#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
172template <static_mat MatA, static_vec VecB>
173requires (tensor_dimension<MatA, 0> == tensor_dimension<VecB, 0>)
175 using out_value_type =
177 static auto constexpr M = tensor_dimension<MatA, 0>;
178 static auto constexpr N = tensor_dimension<MatA, 1>;
181 using out_vec_type =
vec<out_value_type, (M < N ? M : N)>;
182 using out_type = std::optional<out_vec_type>;
183 auto tau = out_type{out_vec_type{}};
187 return out_type{std::nullopt};
191 return out_type{std::nullopt};
196 return out_type{std::nullopt};
198 for (std::size_t i = 0; i < tau->dimension(0); ++i) {
205template <static_mat MatA, static_mat MatB>
206requires (tensor_dimension<MatA, 0> == tensor_dimension<MatB, 0>)
208 static auto constexpr M = tensor_dimension<MatA, 0>;
209 static auto constexpr N = tensor_dimension<MatA, 1>;
210 static auto constexpr K = tensor_dimension<MatB, 1>;
211 if constexpr (M == 2 && N == 2 && K >= M) {
212 return solve_direct(std::forward<MatA>(A), std::forward<MatB>(B));
213 }
else if constexpr (M == N) {
215#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
216 throw std::runtime_error{
217 "cannot do a LU-factorization because LAPACK is missing"};
221 }
else if constexpr (M > N) {
222#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
225 throw std::runtime_error{
226 "cannot do a QR-factorization because LAPACK is missing"};
230 throw std::runtime_error{
"System is under-determined."};
234#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
235template <dynamic_tensor TensorA, dynamic_tensor TensorB>
238 assert(A_.rank() == 2);
239 assert(A_.dimension(0) == A_.dimension(1));
240 assert(A_.dimension(0) == B_.dimension(0));
241 assert(B_.rank() == 1 || B_.rank() == 2);
249 if (
auto const info =
lapack::gesv(A, B, ipiv); info != 0) {
252 return std::forward<decltype(B)>(B);
254 using out_value_type =
266#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
267template <dynamic_tensor TensorA, dynamic_tensor TensorB>
271 using out_value_type =
273 assert(A_.rank() == 2);
274 assert(B_.rank() == 1 || B_.rank() == 2);
275 assert(A_.dimension(0) == B_.dimension(0));
278 auto const M = A.dimension(0);
279 auto const N = A.dimension(1);
280 auto const K = (B.rank() == 1 ? 1 : B.dimension(1));
299 [&](
auto const i,
auto const j) {
311template <static_mat MatA, static_vec VecB>
312requires (tensor_dimension<MatA, 0> == tensor_dimension<VecB, 0>)
314 static auto constexpr M = tensor_dimension<MatA, 0>;
315 static auto constexpr N = tensor_dimension<MatA, 1>;
316 if constexpr (M == 2 && N == 2) {
317 return solve_direct(std::forward<MatA>(A), std::forward<VecB>(b));
318 }
else if constexpr (M == 3 && N == 3) {
319 return solve_cramer(std::forward<MatA>(A), std::forward<VecB>(b));
320 }
else if constexpr (M == N) {
321#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
324 throw std::runtime_error{
325 "cannot do a LU-factorization because LAPACK is missing"};
327 }
else if constexpr (M > N) {
328#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
331 throw std::runtime_error{
332 "cannot do a QR-factorization because LAPACK is missing"};
335 throw std::runtime_error{
"System is under-determined."};
339#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
348template <dynamic_tensor TensorA, dynamic_tensor TensorB>
354 assert(A_.rank() == 2);
355 assert(A_.dimension(0) == A_.dimension(1));
358 assert(B_.rank() == 1 || B_.rank() == 2);
360 assert(B_.dimension(0) == A_.dimension(0));
369 <<
"[lapack::sysv]\n D(" << info <<
", " << info
370 <<
") is exactly zero. The factorization has been completed, but the "
371 "block diagonal matrix D is exactly singular, so the solution "
373 "not be computed.\n";
374 }
else if (info < 0) {
375 std::cerr <<
"[lapack::sysv]\n Parameter " << -info <<
" is wrong.\n";
380 return std::forward<decltype(B)>(B);
390 <<
"[lapack::sysv]\n D(" << info <<
", " << info
391 <<
") is exactly zero. The factorization has been completed, but the "
392 "block diagonal matrix D is exactly singular, so the solution "
394 "not be computed.\n";
395 }
else if (info < 0) {
396 std::cerr <<
"[lapack::sysv]\n Parameter " << -info <<
" is wrong.\n";
406#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
472#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
548#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
551 std::forward<
decltype(B)>(B));
555template <dynamic_tensor TensorA, dynamic_tensor TensorB>
558 assert(A.rank() == 2);
559 assert(B.rank() == 1 || B.rank() == 2);
560 assert(B.dimension(0) == A.dimension(0));
561 auto const M = A.dimension(0);
562 auto const N = A.dimension(1);
564#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
566 std::forward<
decltype(B)>(B));
568 throw std::runtime_error{
569 "cannot do a LU-factorization because LAPACK is missing"};
572#if TATOOINE_BLAS_AND_LAPACK_AVAILABLE
574 std::forward<
decltype(B)>(B));
576 throw std::runtime_error{
577 "cannot do a QR-factorization because LAPACK is missing"};
580 throw std::runtime_error{
"System is under-determined."};
Definition: tensor_concepts.h:17
Definition: concepts.h:15
auto gesv(int N, int NRHS, Float *A, int LDA, int *IPIV, Float *B, int LDB) -> int
Definition: gesv.h:38
auto sysv(uplo u, int N, int NRHS, Float *A, int LDA, int *IPIV, Float *B, int LDB, Float *WORK, int LWORK) -> int
Definition: sysv.h:39
auto geqrf(int M, int N, Float *A, int LDA, Float *TAU, Float *WORK, int LWORK) -> int
Definition: geqrf.h:18
auto trtrs(uplo u, op t, diag d, int N, int NRHS, Float *A, int LDA, Float *B, int LDB) -> int
Definition: trtrs.h:18
auto ormqr(side SIDE, op TRANS, int M, int N, int K, Float *A, int LDA, Float *TAU, Float *C, int LDC, Float *WORK, int LWORK) -> int
Definition: ormqr.h:21
Definition: algorithm.h:6
auto solve_symmetric(dynamic_tensor auto &&A, dynamic_tensor auto &&B)
Definition: solve.h:549
typename common_type_impl< Ts... >::type common_type
Definition: common_type.h:23
auto copy_or_keep_if_rvalue_tensor_solve(dynamic_tensor auto &&x) -> decltype(auto)
Definition: solve.h:12
auto constexpr solve_direct(MatA &&A, MatB &&B)
Definition: solve.h:33
typename value_type_impl< T >::type value_type
Definition: type_traits.h:280
auto solve_symmetric_lapack(TensorA &&A_, TensorB &&B_, lapack::uplo uplo=lapack::uplo::lower) -> std::optional< tensor< common_type< tatooine::value_type< decltype(A_)>, tatooine::value_type< decltype(B_)> > > >
Definition: solve.h:349
auto solve(polynomial< Real, 1 > const &p) -> std::vector< Real >
solve a + b*x
Definition: polynomial.h:187
auto solve_lu_lapack(MatA &A_, VecB &&b_) -> std::optional< tensor< common_type< tatooine::value_type< MatA >, tatooine::value_type< VecB > > > >
Definition: solve.h:94
auto solve_qr_lapack(MatA &&A_, MatB &&B_)
Definition: solve.h:136
constexpr auto min(A &&a, B &&b)
Definition: math.h:15
constexpr auto for_loop(Iteration &&iteration, execution_policy::sequential_t, Ranges(&&... ranges)[2]) -> void
Use this function for creating a sequential nested loop.
Definition: for_loop.h:336
constexpr auto det(base_tensor< Tensor, T, 2, 2 > const &A) -> T
Definition: determinant.h:7
auto solve_cramer(MatA const &A, VecB const &b) -> std::optional< vec< common_type< tatooine::value_type< MatA >, tatooine::value_type< VecB > >, tensor_dimension< MatA, 1 > > >
Definition: solve.h:71
static constexpr auto zeros()
Definition: tensor.h:144