From 17b05b61f4231f4e1517e0751e5ef98ef53e7d64 Mon Sep 17 00:00:00 2001
From: Mark Gates <mgates3@icl.utk.edu>
Date: Tue, 14 Nov 2023 21:09:32 -0700
Subject: [PATCH] cond: return value instead of output argument

---
 include/slate/simplified_api.hh | 23 +++++++-------
 include/slate/slate.hh          | 40 +++++++++++++++----------
 src/gecondest.cc                | 49 +++++++++++++++---------------
 src/pocondest.cc                | 49 +++++++++++++++---------------
 src/trcondest.cc                | 53 ++++++++++++++++++++-------------
 test/test_gecondest.cc          |  7 ++---
 test/test_pocondest.cc          |  6 ++--
 test/test_trcondest.cc          | 22 +++++++-------
 8 files changed, 130 insertions(+), 119 deletions(-)
diff --git a/include/slate/simplified_api.hh b/include/slate/simplified_api.hh
index a56cf778d..f16c0b9c1 100644
--- a/include/slate/simplified_api.hh
+++ b/include/slate/simplified_api.hh
@@ -363,14 +363,13 @@ void lu_inverse_using_factor_out_of_place(
 
 // gecondest
 template <typename scalar_t>
-void lu_condest_using_factor(
-    Norm norm,
+blas::real_type<scalar_t> lu_rcondest_using_factor(
+    Norm in_norm,
     Matrix<scalar_t>& A,
     blas::real_type<scalar_t> Anorm,
-    blas::real_type<scalar_t> *rcond,
     Options const& opts = Options())
 {
-    gecondest(norm, A, Anorm, rcond, opts);
+    return gecondest( in_norm, A, Anorm, opts );
 }
 
 //-----------------------------------------
@@ -495,14 +494,13 @@ void chol_inverse_using_factor(
 
 // pocondest
 template <typename scalar_t>
-void chol_condest_using_factor(
-    Norm norm,
+blas::real_type<scalar_t> chol_rcondest_using_factor(
+    Norm in_norm,
     HermitianMatrix<scalar_t>& A,
     blas::real_type<scalar_t> Anorm,
-    blas::real_type<scalar_t> *rcond,
     Options const& opts = Options())
 {
-    pocondest(norm, A, Anorm, rcond, opts);
+    return pocondest( in_norm, A, Anorm, opts );
 }
 
 //-----------------------------------------
@@ -684,16 +682,15 @@ void lq_multiply_by_q(
 
 // trcondest
 template <typename scalar_t>
-void triangular_condest(
-    Norm norm,
+blas::real_type<scalar_t> triangular_rcondest(
+    Norm in_norm,
     TriangularMatrix<scalar_t>& A,
-    blas::real_type<scalar_t> *rcond,
+    blas::real_type<scalar_t> Anorm,
     Options const& opts = Options())
 {
-    trcondest(norm, A, rcond, opts);
+    return trcondest( in_norm, A, Anorm, opts );
 }
 
-
 //------------------------------------------------------------------------------
 // Symmetric/Hermitian Eigenvalues
 
diff --git a/include/slate/slate.hh b/include/slate/slate.hh
index 9ba21f4d0..d0785b5bd 100644
--- a/include/slate/slate.hh
+++ b/include/slate/slate.hh
@@ -1345,15 +1345,14 @@ void steqr2(
 //-----------------------------------------
 // gecondest()
 template <typename scalar_t>
-void gecondest(
-        Norm in_norm,
-        Matrix<scalar_t>& A,
-        blas::real_type<scalar_t> Anorm,
-        blas::real_type<scalar_t> *rcond,
-        Options const& opts = Options());
+blas::real_type<scalar_t> gecondest(
+    Norm in_norm,
+    Matrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    Options const& opts = Options());
 
 template <typename scalar_t>
-[[deprecated( "Pass Anorm by value instead. Will be removed 2024-11." )]]
+[[deprecated( "Use rcond = gecondest(...) and pass Anorm by value instead. Will be removed 2024-11." )]]
 void gecondest(
         Norm in_norm,
         Matrix<scalar_t>& A,
@@ -1361,27 +1360,38 @@ void gecondest(
         blas::real_type<scalar_t> *rcond,
         Options const& opts = Options())
 {
-    gecondest( in_norm, A, *Anorm, rcond, opts );
+    *rcond = gecondest( in_norm, A, *Anorm, opts );
 }
 
 //-----------------------------------------
 // pocondest()
 template <typename scalar_t>
-void pocondest(
-        Norm in_norm,
-        HermitianMatrix<scalar_t>& A,
-        blas::real_type<scalar_t> Anorm,
-        blas::real_type<scalar_t> *rcond,
-        Options const& opts = Options());
+blas::real_type<scalar_t> pocondest(
+    Norm in_norm,
+    HermitianMatrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    Options const& opts = Options());
 
 //-----------------------------------------
 // trcondest()
 template <typename scalar_t>
+blas::real_type<scalar_t> trcondest(
+    Norm in_norm,
+    TriangularMatrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    Options const& opts = Options());
+
+template <typename scalar_t>
+[[deprecated( "Use rcond = trcondest(...) and pass Anorm. Will be removed 2024-11." )]]
 void trcondest(
         Norm in_norm,
         TriangularMatrix<scalar_t>& A,
         blas::real_type<scalar_t> *rcond,
-        Options const& opts = Options());
+        Options const& opts = Options())
+{
+    blas::real_type<scalar_t> Anorm = norm( in_norm, A, opts );
+    *rcond = trcondest( in_norm, A, Anorm, opts );
+}
 
 } // namespace slate
 
diff --git a/src/gecondest.cc b/src/gecondest.cc
index 22e5026b4..99929b32d 100644
--- a/src/gecondest.cc
+++ b/src/gecondest.cc
@@ -38,10 +38,6 @@ namespace slate {
 ///     If Norm::One, the 1-norm of the original matrix A.
 ///     If Norm::Inf, the infinity-norm of the original matrix A.
 ///
-/// @param[in,out] rcond
-///     The reciprocal of the condition number of the matrix A,
-///     computed as stated above.
-///
 /// @param[in] opts
 ///     Additional options, as map of name = value pairs. Possible options:
 ///     - Option::Target:
@@ -51,15 +47,19 @@ namespace slate {
 ///       - HostBatch: batched BLAS on CPU host.
 ///       - Devices:   batched BLAS on GPU device.
 ///
+/// @return rcond
+///     The reciprocal of the condition number of the matrix A,
+///     computed as stated above.
+///     rcond is used instead of cond to handle overflow better.
+///
 /// @ingroup cond_specialization
 ///
 template <typename scalar_t>
-void gecondest(
-           Norm in_norm,
-           Matrix<scalar_t>& A,
-           blas::real_type<scalar_t> Anorm,
-           blas::real_type<scalar_t> *rcond,
-           Options const& opts)
+blas::real_type<scalar_t> gecondest(
+    Norm in_norm,
+    Matrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    Options const& opts)
 {
     using blas::real;
     using real_t = blas::real_type<scalar_t>;
@@ -74,17 +74,18 @@ void gecondest(
     else {
         slate_error("invalid norm.");
     }
+    if (Anorm < 0 || std::isnan( Anorm )) {
+        slate_error( "invalid Anorm" );
+    }
 
     int64_t m = A.m();
 
     // Quick return
-    *rcond = 0.;
     if (m <= 1) {
-        *rcond = 1.;
-        return;
+        return 1.;
     }
     else if (Anorm == 0.) {
-        return;
+        return 0.;
     }
 
     scalar_t alpha = 1.;
@@ -118,8 +119,7 @@ void gecondest(
     MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() );
     MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );
 
-    while (kase != 0)
-    {
+    while (kase != 0) {
         if (kase == kase1) {
             // Multiply by inv(L).
             slate::trsm( Side::Left, alpha, L, X, opts );
@@ -144,42 +144,41 @@ void gecondest(
 
     // Compute the estimate of the reciprocal condition number.
     if (Ainvnorm != 0.0) {
-        *rcond = (1.0 / Ainvnorm) / Anorm;
+        return (1.0 / Ainvnorm) / Anorm;
+    }
+    else {
+        return 0.;
     }
 }
 
 //------------------------------------------------------------------------------
 // Explicit instantiations.
 template
-void gecondest<float>(
+float gecondest<float>(
     Norm in_norm,
     Matrix<float>& A,
     float Anorm,
-    float *rcond,
     Options const& opts);
 
 template
-void gecondest<double>(
+double gecondest<double>(
     Norm in_norm,
     Matrix<double>& A,
     double Anorm,
-    double *rcond,
     Options const& opts);
 
 template
-void gecondest< std::complex<float> >(
+float gecondest< std::complex<float> >(
     Norm in_norm,
     Matrix< std::complex<float> >& A,
     float Anorm,
-    float *rcond,
     Options const& opts);
 
 template
-void gecondest< std::complex<double> >(
+double gecondest< std::complex<double> >(
     Norm in_norm,
     Matrix< std::complex<double> >& A,
     double Anorm,
-    double *rcond,
     Options const& opts);
 
 } // namespace slate
diff --git a/src/pocondest.cc b/src/pocondest.cc
index cf2764818..891246ebd 100644
--- a/src/pocondest.cc
+++ b/src/pocondest.cc
@@ -39,10 +39,6 @@ namespace slate {
 ///     If Norm::One, the 1-norm of the original matrix A.
 ///     If Norm::Inf, the infinity-norm of the original matrix A.
 ///
-/// @param[in,out] rcond
-///     The reciprocal of the condition number of the matrix A,
-///     computed as stated above.
-///
 /// @param[in] opts
 ///     Additional options, as map of name = value pairs. Possible options:
 ///     - Option::Target:
@@ -52,15 +48,19 @@ namespace slate {
 ///       - HostBatch: batched BLAS on CPU host.
 ///       - Devices:   batched BLAS on GPU device.
 ///
+/// @return rcond
+///     The reciprocal of the condition number of the matrix A,
+///     computed as stated above.
+///     rcond is used instead of cond to handle overflow better.
+///
 /// @ingroup cond_specialization
 ///
 template <typename scalar_t>
-void pocondest(
-           Norm in_norm,
-           HermitianMatrix<scalar_t>& A,
-           blas::real_type<scalar_t> Anorm,
-           blas::real_type<scalar_t> *rcond,
-           Options const& opts)
+blas::real_type<scalar_t> pocondest(
+    Norm in_norm,
+    HermitianMatrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    Options const& opts)
 {
     using blas::real;
     using real_t = blas::real_type<scalar_t>;
@@ -69,17 +69,18 @@ void pocondest(
     if (in_norm != Norm::One && in_norm != Norm::Inf) {
         slate_error("invalid norm.");
     }
+    if (Anorm < 0 || std::isnan( Anorm )) {
+        slate_error( "invalid Anorm" );
+    }
 
     int64_t m = A.m();
 
     // Quick return
-    *rcond = 0.;
     if (m <= 1) {
-        *rcond = 1.;
-        return;
+        return 1.;
     }
     else if (Anorm == 0.) {
-        return;
+        return 0.;
     }
 
     real_t Ainvnorm = 0.0;
@@ -107,8 +108,7 @@ void pocondest(
     MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() );
     MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );
 
-    while (kase != 0)
-    {
+    while (kase != 0) {
         // A is symmetric, so both cases are equivalent
         potrs( A, X, opts );
 
@@ -119,42 +119,41 @@ void pocondest(
 
     // Compute the estimate of the reciprocal condition number.
     if (Ainvnorm != 0.0) {
-        *rcond = (1.0 / Ainvnorm) / Anorm;
+        return (1.0 / Ainvnorm) / Anorm;
+    }
+    else {
+        return 0.;
     }
 }
 
 //------------------------------------------------------------------------------
 // Explicit instantiations.
 template
-void pocondest<float>(
+float pocondest<float>(
     Norm in_norm,
     HermitianMatrix<float>& A,
     float Anorm,
-    float *rcond,
     Options const& opts);
 
 template
-void pocondest<double>(
+double pocondest<double>(
     Norm in_norm,
     HermitianMatrix<double>& A,
     double Anorm,
-    double *rcond,
     Options const& opts);
 
 template
-void pocondest< std::complex<float> >(
+float pocondest< std::complex<float> >(
     Norm in_norm,
     HermitianMatrix< std::complex<float> >& A,
     float Anorm,
-    float *rcond,
     Options const& opts);
 
 template
-void pocondest< std::complex<double> >(
+double pocondest< std::complex<double> >(
     Norm in_norm,
     HermitianMatrix< std::complex<double> >& A,
     double Anorm,
-    double *rcond,
     Options const& opts);
 
 } // namespace slate
diff --git a/src/trcondest.cc b/src/trcondest.cc
index 591144653..3bcc2922d 100644
--- a/src/trcondest.cc
+++ b/src/trcondest.cc
@@ -33,9 +33,9 @@ namespace slate {
 /// @param[in] A
 ///     On entry, the n-by-n triangular matrix $A$.
 ///
-/// @param[in,out] rcond
-///     The reciprocal of the condition number of the matrix A,
-///     computed as stated above.
+/// @param[in] Anorm
+///     If Norm::One, the 1-norm of the original matrix A.
+///     If Norm::Inf, the infinity-norm of the original matrix A.
 ///
 /// @param[in] opts
 ///     Additional options, as map of name = value pairs. Possible options:
@@ -46,14 +46,19 @@ namespace slate {
 ///       - HostBatch: batched BLAS on CPU host.
 ///       - Devices:   batched BLAS on GPU device.
 ///
+/// @return rcond
+///     The reciprocal of the condition number of the matrix A,
+///     computed as stated above.
+///     rcond is used instead of cond to handle overflow better.
+///
 /// @ingroup cond_specialization
 ///
 template <typename scalar_t>
-void trcondest(
-           Norm in_norm,
-           TriangularMatrix<scalar_t>& A,
-           blas::real_type<scalar_t> *rcond,
-           Options const& opts)
+blas::real_type<scalar_t> trcondest(
+    Norm in_norm,
+    TriangularMatrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    Options const& opts)
 {
     using blas::real;
     using real_t = blas::real_type<scalar_t>;
@@ -68,14 +73,18 @@ void trcondest(
     else {
         slate_error("invalid norm.");
     }
+    if (Anorm < 0 || std::isnan( Anorm )) {
+        slate_error( "invalid Anorm" );
+    }
 
     int64_t m = A.m();
 
     // Quick return
-    *rcond = 0.;
     if (m <= 0) {
-        *rcond = 1.;
-        return;
+        return 1.;
+    }
+    else if (Anorm == 0.) {
+        return 0.;
     }
 
     scalar_t alpha = 1.;
@@ -119,41 +128,43 @@ void trcondest(
         MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );
     } // while (kase != 0)
 
-    real_t Anorm = norm(in_norm, A, opts);
     // Compute the estimate of the reciprocal condition number.
     if (Ainvnorm != 0.0) {
-        *rcond = (1.0 / Ainvnorm) / Anorm;
+        return (1.0 / Ainvnorm) / Anorm;
+    }
+    else {
+        return 0.;
     }
 }
 
 //------------------------------------------------------------------------------
 // Explicit instantiations.
 template
-void trcondest<float>(
+float trcondest<float>(
     Norm in_norm,
     TriangularMatrix<float>& A,
-    float *rcond,
+    float Anorm,
     Options const& opts);
 
 template
-void trcondest<double>(
+double trcondest<double>(
     Norm in_norm,
     TriangularMatrix<double>& A,
-    double *rcond,
+    double Anorm,
     Options const& opts);
 
 template
-void trcondest< std::complex<float> >(
+float trcondest< std::complex<float> >(
     Norm in_norm,
     TriangularMatrix< std::complex<float> >& A,
-    float *rcond,
+    float Anorm,
     Options const& opts);
 
 template
-void trcondest< std::complex<double> >(
+double trcondest< std::complex<double> >(
     Norm in_norm,
     TriangularMatrix< std::complex<double> >& A,
-    double *rcond,
+    double Anorm,
     Options const& opts);
 
 } // namespace slate
diff --git a/test/test_gecondest.cc b/test/test_gecondest.cc
index 7deff8801..3b7154d5a 100644
--- a/test/test_gecondest.cc
+++ b/test/test_gecondest.cc
@@ -176,12 +176,11 @@ void test_gecondest_work(Params& params, bool run)
         params.time2() = time2;
         params.gflops2() = gflop / time2;
 
-
-
         double time = barrier_get_wtime(MPI_COMM_WORLD);
-        slate::lu_condest_using_factor(norm, A, Anorm, &slate_rcond, opts);
+        slate_rcond = slate::lu_rcondest_using_factor( norm, A, Anorm, opts );
         // Using traditional BLAS/LAPACK name
-        // slate::gecondest(norm, A, Anorm, &slate_rcond, opts);
+        // slate_rcond = slate::gecondest( norm, A, Anorm, opts );
+        // slate::gecondest( norm, A, &Anorm, &slate_rcond, opts );  // deprecated
         time = barrier_get_wtime(MPI_COMM_WORLD) - time;
         // compute and save timing/performance
         params.time() = time;
diff --git a/test/test_pocondest.cc b/test/test_pocondest.cc
index 2913bcc49..97d1b9a0a 100644
--- a/test/test_pocondest.cc
+++ b/test/test_pocondest.cc
@@ -170,12 +170,10 @@ void test_pocondest_work(Params& params, bool run)
         params.time2() = time2;
         params.gflops2() = gflop / time2;
 
-
-
         double time = barrier_get_wtime(MPI_COMM_WORLD);
-        slate::chol_condest_using_factor(norm, A, Anorm, &slate_rcond, opts);
+        slate_rcond = slate::chol_rcondest_using_factor( norm, A, Anorm, opts );
         // Using traditional BLAS/LAPACK name
-        // slate::pocondest(norm, A, Anorm, &slate_rcond, opts);
+        // slate_rcond = slate::pocondest( norm, A, Anorm, opts );
         time = barrier_get_wtime(MPI_COMM_WORLD) - time;
         // compute and save timing/performance
         params.time() = time;
diff --git a/test/test_trcondest.cc b/test/test_trcondest.cc
index 3557a5556..2a1693ebe 100644
--- a/test/test_trcondest.cc
+++ b/test/test_trcondest.cc
@@ -131,8 +131,6 @@ void test_trcondest_work(Params& params, bool run)
 
     double gflop = lapack::Gflop<scalar_t>::getrf(m, n);
 
-    // Compute the matrix norm
-    real_t Anorm = 0;
     real_t slate_rcond = 1., scl_rcond = 1., exact_rcond = 1.;
 
     if (! ref_only) {
@@ -154,24 +152,24 @@ void test_trcondest_work(Params& params, bool run)
         double time = barrier_get_wtime(MPI_COMM_WORLD);
         auto R  = slate::TriangularMatrix<scalar_t>(
             slate::Uplo::Upper, slate::Diag::NonUnit, A );
-        slate::triangular_condest(norm, R, &slate_rcond, opts);
+
+        real_t Rnorm = slate::norm( norm, R, opts );
+        slate_rcond = slate::triangular_rcondest( norm, R, Rnorm, opts );
         // Using traditional BLAS/LAPACK name
-        // slate::trcondest(norm, R, &slate_rcond, opts);
+        // slate_rcond = slate::trcondest( norm, R, Rnorm, opts );
+        // slate::trcondest( norm, R, &slate_rcond, opts );  // deprecated
         time = barrier_get_wtime(MPI_COMM_WORLD) - time;
         // compute and save timing/performance
         params.time() = time;
         params.gflops() = gflop / time;
 
         if (trace) slate::trace::Trace::finish();
-    }
 
-    if (check) {
-        // Find the exact condition number:
-        auto R  = slate::TriangularMatrix<scalar_t>(
-            slate::Uplo::Upper, slate::Diag::NonUnit, A );
-        Anorm = slate::norm(norm, R, opts);
-        trtri(R, opts);
-        exact_rcond = (1. / slate::norm(norm, R, opts)) / Anorm;
+        if (check) {
+            // Find the exact condition number:
+            trtri( R, opts );
+            exact_rcond = (1. / slate::norm( norm, R, opts )) / Rnorm;
+        }
     }
 
     if (ref) {