From 17b05b61f4231f4e1517e0751e5ef98ef53e7d64 Mon Sep 17 00:00:00 2001 From: Mark Gates Date: Tue, 14 Nov 2023 21:09:32 -0700 Subject: [PATCH] cond: return value instead of output argument --- include/slate/simplified_api.hh | 23 +++++++------- include/slate/slate.hh | 40 +++++++++++++++---------- src/gecondest.cc | 49 +++++++++++++++--------------- src/pocondest.cc | 49 +++++++++++++++--------------- src/trcondest.cc | 53 ++++++++++++++++++++------------- test/test_gecondest.cc | 7 ++--- test/test_pocondest.cc | 6 ++-- test/test_trcondest.cc | 22 +++++++------- 8 files changed, 130 insertions(+), 119 deletions(-) diff --git a/include/slate/simplified_api.hh b/include/slate/simplified_api.hh index a56cf778d..f16c0b9c1 100644 --- a/include/slate/simplified_api.hh +++ b/include/slate/simplified_api.hh @@ -363,14 +363,13 @@ void lu_inverse_using_factor_out_of_place( // gecondest template -void lu_condest_using_factor( - Norm norm, +blas::real_type lu_rcondest_using_factor( + Norm in_norm, Matrix& A, blas::real_type Anorm, - blas::real_type *rcond, Options const& opts = Options()) { - gecondest(norm, A, Anorm, rcond, opts); + return gecondest( in_norm, A, Anorm, opts ); } //----------------------------------------- @@ -495,14 +494,13 @@ void chol_inverse_using_factor( // pocondest template -void chol_condest_using_factor( - Norm norm, +blas::real_type chol_rcondest_using_factor( + Norm in_norm, HermitianMatrix& A, blas::real_type Anorm, - blas::real_type *rcond, Options const& opts = Options()) { - pocondest(norm, A, Anorm, rcond, opts); + return pocondest( in_norm, A, Anorm, opts ); } //----------------------------------------- @@ -684,16 +682,15 @@ void lq_multiply_by_q( // trcondest template -void triangular_condest( - Norm norm, +blas::real_type triangular_rcondest( + Norm in_norm, TriangularMatrix& A, - blas::real_type *rcond, + blas::real_type Anorm, Options const& opts = Options()) { - trcondest(norm, A, rcond, opts); + return trcondest( in_norm, A, Anorm, opts ); } - //------------------------------------------------------------------------------ // Symmetric/Hermitian Eigenvalues diff --git a/include/slate/slate.hh b/include/slate/slate.hh index 9ba21f4d0..d0785b5bd 100644 --- a/include/slate/slate.hh +++ b/include/slate/slate.hh @@ -1345,15 +1345,14 @@ void steqr2( //----------------------------------------- // gecondest() template -void gecondest( - Norm in_norm, - Matrix& A, - blas::real_type Anorm, - blas::real_type *rcond, - Options const& opts = Options()); +blas::real_type gecondest( + Norm in_norm, + Matrix& A, + blas::real_type Anorm, + Options const& opts = Options()); template -[[deprecated( "Pass Anorm by value instead. Will be removed 2024-11." )]] +[[deprecated( "Use rcond = gecondest(...) and pass Anorm by value instead. Will be removed 2024-11." )]] void gecondest( Norm in_norm, Matrix& A, @@ -1361,27 +1360,38 @@ void gecondest( blas::real_type *rcond, Options const& opts = Options()) { - gecondest( in_norm, A, *Anorm, rcond, opts ); + *rcond = gecondest( in_norm, A, *Anorm, opts ); } //----------------------------------------- // pocondest() template -void pocondest( - Norm in_norm, - HermitianMatrix& A, - blas::real_type Anorm, - blas::real_type *rcond, - Options const& opts = Options()); +blas::real_type pocondest( + Norm in_norm, + HermitianMatrix& A, + blas::real_type Anorm, + Options const& opts = Options()); //----------------------------------------- // trcondest() template +blas::real_type trcondest( + Norm in_norm, + TriangularMatrix& A, + blas::real_type Anorm, + Options const& opts = Options()); + +template +[[deprecated( "Use rcond = trcondest(...) and pass Anorm. Will be removed 2024-11." )]] void trcondest( Norm in_norm, TriangularMatrix& A, blas::real_type *rcond, - Options const& opts = Options()); + Options const& opts = Options()) +{ + blas::real_type Anorm = norm( in_norm, A, opts ); + *rcond = trcondest( in_norm, A, Anorm, opts ); +} } // namespace slate diff --git a/src/gecondest.cc b/src/gecondest.cc index 22e5026b4..99929b32d 100644 --- a/src/gecondest.cc +++ b/src/gecondest.cc @@ -38,10 +38,6 @@ namespace slate { /// If Norm::One, the 1-norm of the original matrix A. /// If Norm::Inf, the infinity-norm of the original matrix A. /// -/// @param[in,out] rcond -/// The reciprocal of the condition number of the matrix A, -/// computed as stated above. -/// /// @param[in] opts /// Additional options, as map of name = value pairs. Possible options: /// - Option::Target: @@ -51,15 +47,19 @@ namespace slate { /// - HostBatch: batched BLAS on CPU host. /// - Devices: batched BLAS on GPU device. /// +/// @return rcond +/// The reciprocal of the condition number of the matrix A, +/// computed as stated above. +/// rcond is used instead of cond to handle overflow better. +/// /// @ingroup cond_specialization /// template -void gecondest( - Norm in_norm, - Matrix& A, - blas::real_type Anorm, - blas::real_type *rcond, - Options const& opts) +blas::real_type gecondest( + Norm in_norm, + Matrix& A, + blas::real_type Anorm, + Options const& opts) { using blas::real; using real_t = blas::real_type; @@ -74,17 +74,18 @@ void gecondest( else { slate_error("invalid norm."); } + if (Anorm < 0 || std::isnan( Anorm )) { + slate_error( "invalid Anorm" ); + } int64_t m = A.m(); // Quick return - *rcond = 0.; if (m <= 1) { - *rcond = 1.; - return; + return 1.; } else if (Anorm == 0.) { - return; + return 0.; } scalar_t alpha = 1.; @@ -118,8 +119,7 @@ void gecondest( MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() ); MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() ); - while (kase != 0) - { + while (kase != 0) { if (kase == kase1) { // Multiply by inv(L). slate::trsm( Side::Left, alpha, L, X, opts ); @@ -144,42 +144,41 @@ void gecondest( // Compute the estimate of the reciprocal condition number. if (Ainvnorm != 0.0) { - *rcond = (1.0 / Ainvnorm) / Anorm; + return (1.0 / Ainvnorm) / Anorm; + } + else { + return 0.; } } //------------------------------------------------------------------------------ // Explicit instantiations. template -void gecondest( +float gecondest( Norm in_norm, Matrix& A, float Anorm, - float *rcond, Options const& opts); template -void gecondest( +double gecondest( Norm in_norm, Matrix& A, double Anorm, - double *rcond, Options const& opts); template -void gecondest< std::complex >( +float gecondest< std::complex >( Norm in_norm, Matrix< std::complex >& A, float Anorm, - float *rcond, Options const& opts); template -void gecondest< std::complex >( +double gecondest< std::complex >( Norm in_norm, Matrix< std::complex >& A, double Anorm, - double *rcond, Options const& opts); } // namespace slate diff --git a/src/pocondest.cc b/src/pocondest.cc index cf2764818..891246ebd 100644 --- a/src/pocondest.cc +++ b/src/pocondest.cc @@ -39,10 +39,6 @@ namespace slate { /// If Norm::One, the 1-norm of the original matrix A. /// If Norm::Inf, the infinity-norm of the original matrix A. /// -/// @param[in,out] rcond -/// The reciprocal of the condition number of the matrix A, -/// computed as stated above. -/// /// @param[in] opts /// Additional options, as map of name = value pairs. Possible options: /// - Option::Target: @@ -52,15 +48,19 @@ namespace slate { /// - HostBatch: batched BLAS on CPU host. /// - Devices: batched BLAS on GPU device. /// +/// @return rcond +/// The reciprocal of the condition number of the matrix A, +/// computed as stated above. +/// rcond is used instead of cond to handle overflow better. +/// /// @ingroup cond_specialization /// template -void pocondest( - Norm in_norm, - HermitianMatrix& A, - blas::real_type Anorm, - blas::real_type *rcond, - Options const& opts) +blas::real_type pocondest( + Norm in_norm, + HermitianMatrix& A, + blas::real_type Anorm, + Options const& opts) { using blas::real; using real_t = blas::real_type; @@ -69,17 +69,18 @@ void pocondest( if (in_norm != Norm::One && in_norm != Norm::Inf) { slate_error("invalid norm."); } + if (Anorm < 0 || std::isnan( Anorm )) { + slate_error( "invalid Anorm" ); + } int64_t m = A.m(); // Quick return - *rcond = 0.; if (m <= 1) { - *rcond = 1.; - return; + return 1.; } else if (Anorm == 0.) { - return; + return 0.; } real_t Ainvnorm = 0.0; @@ -107,8 +108,7 @@ void pocondest( MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() ); MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() ); - while (kase != 0) - { + while (kase != 0) { // A is symmetric, so both cases are equivalent potrs( A, X, opts ); @@ -119,42 +119,41 @@ void pocondest( // Compute the estimate of the reciprocal condition number. if (Ainvnorm != 0.0) { - *rcond = (1.0 / Ainvnorm) / Anorm; + return (1.0 / Ainvnorm) / Anorm; + } + else { + return 0.; } } //------------------------------------------------------------------------------ // Explicit instantiations. template -void pocondest( +float pocondest( Norm in_norm, HermitianMatrix& A, float Anorm, - float *rcond, Options const& opts); template -void pocondest( +double pocondest( Norm in_norm, HermitianMatrix& A, double Anorm, - double *rcond, Options const& opts); template -void pocondest< std::complex >( +float pocondest< std::complex >( Norm in_norm, HermitianMatrix< std::complex >& A, float Anorm, - float *rcond, Options const& opts); template -void pocondest< std::complex >( +double pocondest< std::complex >( Norm in_norm, HermitianMatrix< std::complex >& A, double Anorm, - double *rcond, Options const& opts); } // namespace slate diff --git a/src/trcondest.cc b/src/trcondest.cc index 591144653..3bcc2922d 100644 --- a/src/trcondest.cc +++ b/src/trcondest.cc @@ -33,9 +33,9 @@ namespace slate { /// @param[in] A /// On entry, the n-by-n triangular matrix $A$. /// -/// @param[in,out] rcond -/// The reciprocal of the condition number of the matrix A, -/// computed as stated above. +/// @param[in] Anorm +/// If Norm::One, the 1-norm of the original matrix A. +/// If Norm::Inf, the infinity-norm of the original matrix A. /// /// @param[in] opts /// Additional options, as map of name = value pairs. Possible options: @@ -46,14 +46,19 @@ namespace slate { /// - HostBatch: batched BLAS on CPU host. /// - Devices: batched BLAS on GPU device. /// +/// @return rcond +/// The reciprocal of the condition number of the matrix A, +/// computed as stated above. +/// rcond is used instead of cond to handle overflow better. +/// /// @ingroup cond_specialization /// template -void trcondest( - Norm in_norm, - TriangularMatrix& A, - blas::real_type *rcond, - Options const& opts) +blas::real_type trcondest( + Norm in_norm, + TriangularMatrix& A, + blas::real_type Anorm, + Options const& opts) { using blas::real; using real_t = blas::real_type; @@ -68,14 +73,18 @@ void trcondest( else { slate_error("invalid norm."); } + if (Anorm < 0 || std::isnan( Anorm )) { + slate_error( "invalid Anorm" ); + } int64_t m = A.m(); // Quick return - *rcond = 0.; if (m <= 0) { - *rcond = 1.; - return; + return 1.; + } + else if (Anorm == 0.) { + return 0.; } scalar_t alpha = 1.; @@ -119,41 +128,43 @@ void trcondest( MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() ); } // while (kase != 0) - real_t Anorm = norm(in_norm, A, opts); // Compute the estimate of the reciprocal condition number. if (Ainvnorm != 0.0) { - *rcond = (1.0 / Ainvnorm) / Anorm; + return (1.0 / Ainvnorm) / Anorm; + } + else { + return 0.; } } //------------------------------------------------------------------------------ // Explicit instantiations. template -void trcondest( +float trcondest( Norm in_norm, TriangularMatrix& A, - float *rcond, + float Anorm, Options const& opts); template -void trcondest( +double trcondest( Norm in_norm, TriangularMatrix& A, - double *rcond, + double Anorm, Options const& opts); template -void trcondest< std::complex >( +float trcondest< std::complex >( Norm in_norm, TriangularMatrix< std::complex >& A, - float *rcond, + float Anorm, Options const& opts); template -void trcondest< std::complex >( +double trcondest< std::complex >( Norm in_norm, TriangularMatrix< std::complex >& A, - double *rcond, + double Anorm, Options const& opts); } // namespace slate diff --git a/test/test_gecondest.cc b/test/test_gecondest.cc index 7deff8801..3b7154d5a 100644 --- a/test/test_gecondest.cc +++ b/test/test_gecondest.cc @@ -176,12 +176,11 @@ void test_gecondest_work(Params& params, bool run) params.time2() = time2; params.gflops2() = gflop / time2; - - double time = barrier_get_wtime(MPI_COMM_WORLD); - slate::lu_condest_using_factor(norm, A, Anorm, &slate_rcond, opts); + slate_rcond = slate::lu_rcondest_using_factor( norm, A, Anorm, opts ); // Using traditional BLAS/LAPACK name - // slate::gecondest(norm, A, Anorm, &slate_rcond, opts); + // slate_rcond = slate::gecondest( norm, A, Anorm, opts ); + // slate::gecondest( norm, A, &Anorm, &slate_rcond, opts ); // deprecated time = barrier_get_wtime(MPI_COMM_WORLD) - time; // compute and save timing/performance params.time() = time; diff --git a/test/test_pocondest.cc b/test/test_pocondest.cc index 2913bcc49..97d1b9a0a 100644 --- a/test/test_pocondest.cc +++ b/test/test_pocondest.cc @@ -170,12 +170,10 @@ void test_pocondest_work(Params& params, bool run) params.time2() = time2; params.gflops2() = gflop / time2; - - double time = barrier_get_wtime(MPI_COMM_WORLD); - slate::chol_condest_using_factor(norm, A, Anorm, &slate_rcond, opts); + slate_rcond = slate::chol_rcondest_using_factor( norm, A, Anorm, opts ); // Using traditional BLAS/LAPACK name - // slate::pocondest(norm, A, Anorm, &slate_rcond, opts); + // slate_rcond = slate::pocondest( norm, A, Anorm, opts ); time = barrier_get_wtime(MPI_COMM_WORLD) - time; // compute and save timing/performance params.time() = time; diff --git a/test/test_trcondest.cc b/test/test_trcondest.cc index 3557a5556..2a1693ebe 100644 --- a/test/test_trcondest.cc +++ b/test/test_trcondest.cc @@ -131,8 +131,6 @@ void test_trcondest_work(Params& params, bool run) double gflop = lapack::Gflop::getrf(m, n); - // Compute the matrix norm - real_t Anorm = 0; real_t slate_rcond = 1., scl_rcond = 1., exact_rcond = 1.; if (! ref_only) { @@ -154,24 +152,24 @@ void test_trcondest_work(Params& params, bool run) double time = barrier_get_wtime(MPI_COMM_WORLD); auto R = slate::TriangularMatrix( slate::Uplo::Upper, slate::Diag::NonUnit, A ); - slate::triangular_condest(norm, R, &slate_rcond, opts); + + real_t Rnorm = slate::norm( norm, R, opts ); + slate_rcond = slate::triangular_rcondest( norm, R, Rnorm, opts ); // Using traditional BLAS/LAPACK name - // slate::trcondest(norm, R, &slate_rcond, opts); + // slate_rcond = slate::trcondest( norm, R, Rnorm, opts ); + // slate::trcondest( norm, R, &slate_rcond, opts ); // deprecated time = barrier_get_wtime(MPI_COMM_WORLD) - time; // compute and save timing/performance params.time() = time; params.gflops() = gflop / time; if (trace) slate::trace::Trace::finish(); - } - if (check) { - // Find the exact condition number: - auto R = slate::TriangularMatrix( - slate::Uplo::Upper, slate::Diag::NonUnit, A ); - Anorm = slate::norm(norm, R, opts); - trtri(R, opts); - exact_rcond = (1. / slate::norm(norm, R, opts)) / Anorm; + if (check) { + // Find the exact condition number: + trtri( R, opts ); + exact_rcond = (1. / slate::norm( norm, R, opts )) / Rnorm; + } } if (ref) {