diff --git a/GNUmakefile b/GNUmakefile
index 97c97adcc..aae7fa9e1 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -624,6 +624,7 @@ ifneq ($(only_unit),1)
         src/pbsv.cc \
         src/pbtrf.cc \
         src/pbtrs.cc \
+        src/pocondest.cc \
         src/posv.cc \
         src/posv_mixed.cc \
         src/posv_mixed_gmres.cc \
@@ -731,6 +732,7 @@ tester_src += \
         test/test_herk.cc \
         test/test_hesv.cc \
         test/test_pbsv.cc \
+        test/test_pocondest.cc \
         test/test_posv.cc \
         test/test_potri.cc \
         test/test_scale.cc \
diff --git a/include/slate/BaseMatrix.hh b/include/slate/BaseMatrix.hh
index 37661d91e..8b1e3892a 100644
--- a/include/slate/BaseMatrix.hh
+++ b/include/slate/BaseMatrix.hh
@@ -2535,6 +2535,9 @@ void BaseMatrix<scalar_t>::tileCopyDataLayout(Tile<scalar_t>* src_tile,
     // TODO Consider inter-Queue dependencies instead of disabling async
     async &= !(dst_device != HostNum && src_device != HostNum);
 
+    if (dst_tile->layout() != target_layout && ! dst_tile->isTransposable()) {
+        storage_->tileMakeTransposable( dst_tile );
+    }
 
     if (is_square || ! need_convert) {
         lapack::Queue* queue = comm_queue( work_device );
@@ -2546,9 +2549,6 @@ void BaseMatrix<scalar_t>::tileCopyDataLayout(Tile<scalar_t>* src_tile,
         }
     }
     else {
-        if (dst_tile->layout() != target_layout && ! dst_tile->isTransposable()) {
-            storage_->tileMakeTransposable( dst_tile );
-        }
         dst_tile->setLayout( target_layout );
 
         scalar_t* work_data = nullptr;
diff --git a/include/slate/simplified_api.hh b/include/slate/simplified_api.hh
index 1fa0e3c5b..a56cf778d 100644
--- a/include/slate/simplified_api.hh
+++ b/include/slate/simplified_api.hh
@@ -358,6 +358,21 @@ void lu_inverse_using_factor_out_of_place(
     getri(A, pivots, A_inverse, opts);
 }
 
+//-----------------------------------------
+// lu_condest_using_factor()
+
+// gecondest
+template <typename scalar_t>
+void lu_condest_using_factor(
+    Norm norm,
+    Matrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    blas::real_type<scalar_t> *rcond,
+    Options const& opts = Options())
+{
+    gecondest(norm, A, Anorm, rcond, opts);
+}
+
 //-----------------------------------------
 // Cholesky
 
@@ -475,6 +490,21 @@ void chol_inverse_using_factor(
     potri(A, opts);
 }
 
+//-----------------------------------------
+// chol_condest_using_factor()
+
+// pocondest
+template <typename scalar_t>
+void chol_condest_using_factor(
+    Norm norm,
+    HermitianMatrix<scalar_t>& A,
+    blas::real_type<scalar_t> Anorm,
+    blas::real_type<scalar_t> *rcond,
+    Options const& opts = Options())
+{
+    pocondest(norm, A, Anorm, rcond, opts);
+}
+
 //-----------------------------------------
 // Symmetric indefinite -- block Aasen's
 
@@ -649,6 +679,21 @@ void lq_multiply_by_q(
     unmlq(side, op, A, T, C, opts);
 }
 
+//-----------------------------------------
+// triangular_condest()
+
+// trcondest
+template <typename scalar_t>
+void triangular_condest(
+    Norm norm,
+    TriangularMatrix<scalar_t>& A,
+    blas::real_type<scalar_t> *rcond,
+    Options const& opts = Options())
+{
+    trcondest(norm, A, rcond, opts);
+}
+
+
 //------------------------------------------------------------------------------
 // Symmetric/Hermitian Eigenvalues
 
diff --git a/include/slate/slate.hh b/include/slate/slate.hh
index 0269b8938..9ba21f4d0 100644
--- a/include/slate/slate.hh
+++ b/include/slate/slate.hh
@@ -1345,11 +1345,33 @@ void steqr2(
 //-----------------------------------------
 // gecondest()
 template <typename scalar_t>
+void gecondest(
+        Norm in_norm,
+        Matrix<scalar_t>& A,
+        blas::real_type<scalar_t> Anorm,
+        blas::real_type<scalar_t> *rcond,
+        Options const& opts = Options());
+
+template <typename scalar_t>
+[[deprecated( "Pass Anorm by value instead. Will be removed 2024-11." )]]
 void gecondest(
         Norm in_norm,
         Matrix<scalar_t>& A,
         blas::real_type<scalar_t> *Anorm,
         blas::real_type<scalar_t> *rcond,
+        Options const& opts = Options())
+{
+    gecondest( in_norm, A, *Anorm, rcond, opts );
+}
+
+//-----------------------------------------
+// pocondest()
+template <typename scalar_t>
+void pocondest(
+        Norm in_norm,
+        HermitianMatrix<scalar_t>& A,
+        blas::real_type<scalar_t> Anorm,
+        blas::real_type<scalar_t> *rcond,
         Options const& opts = Options());
 
 //-----------------------------------------
diff --git a/src/gecondest.cc b/src/gecondest.cc
index 26b65355c..22e5026b4 100644
--- a/src/gecondest.cc
+++ b/src/gecondest.cc
@@ -57,7 +57,7 @@ template <typename scalar_t>
 void gecondest(
            Norm in_norm,
            Matrix<scalar_t>& A,
-           blas::real_type<scalar_t> *Anorm,
+           blas::real_type<scalar_t> Anorm,
            blas::real_type<scalar_t> *rcond,
            Options const& opts)
 {
@@ -83,7 +83,7 @@ void gecondest(
         *rcond = 1.;
         return;
     }
-    else if (*Anorm == 0.) {
+    else if (Anorm == 0.) {
         return;
     }
 
@@ -144,7 +144,7 @@ void gecondest(
 
     // Compute the estimate of the reciprocal condition number.
     if (Ainvnorm != 0.0) {
-        *rcond = (1.0 / Ainvnorm) / *Anorm;
+        *rcond = (1.0 / Ainvnorm) / Anorm;
     }
 }
 
@@ -154,7 +154,7 @@ template
 void gecondest<float>(
     Norm in_norm,
     Matrix<float>& A,
-    float *Anorm,
+    float Anorm,
     float *rcond,
     Options const& opts);
 
@@ -162,7 +162,7 @@ template
 void gecondest<double>(
     Norm in_norm,
     Matrix<double>& A,
-    double *Anorm,
+    double Anorm,
     double *rcond,
     Options const& opts);
 
@@ -170,7 +170,7 @@ template
 void gecondest< std::complex<float> >(
     Norm in_norm,
     Matrix< std::complex<float> >& A,
-    float *Anorm,
+    float Anorm,
     float *rcond,
     Options const& opts);
 
@@ -178,7 +178,7 @@ template
 void gecondest< std::complex<double> >(
     Norm in_norm,
     Matrix< std::complex<double> >& A,
-    double *Anorm,
+    double Anorm,
     double *rcond,
     Options const& opts);
 
diff --git a/src/pocondest.cc b/src/pocondest.cc
new file mode 100644
index 000000000..cf2764818
--- /dev/null
+++ b/src/pocondest.cc
@@ -0,0 +1,160 @@
+// Copyright (c) 2017-2022, University of Tennessee. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
+
+#include "slate/slate.hh"
+#include "auxiliary/Debug.hh"
+#include "slate/Matrix.hh"
+#include "slate/HermitianMatrix.hh"
+#include "internal/internal.hh"
+
+namespace slate {
+
+//------------------------------------------------------------------------------
+/// Distributed parallel estimate of the reciprocal of the condition number
+/// of a general matrix A, in either the 1-norm or the infinity-norm.
+/// Generic implementation for any target.
+///
+/// The reciprocal of the condition number computed as:
+/// \[
+///     rcond = \frac{1}{\|\|A\|\| \times \|\|A^{-1}\|\-}
+/// \]
+/// where $A$ is the output of the Cholesky factorization (potrf).
+///
+//------------------------------------------------------------------------------
+/// @tparam scalar_t
+///     One of float, double, std::complex<float>, std::complex<double>.
+//------------------------------------------------------------------------------
+/// @param[in] in_norm
+///     rcond to compute:
+///     - Norm::One: 1-norm condition number
+///     - Norm::Inf: infinity-norm condition number
+///
+/// @param[in] A
+///     On entry, the n-by-n matrix $A$.
+///     It is the output of the Cholesky factorization of a Hermitian matrix.
+///
+/// @param[in] Anorm
+///     If Norm::One, the 1-norm of the original matrix A.
+///     If Norm::Inf, the infinity-norm of the original matrix A.
+///
+/// @param[in,out] rcond
+///     The reciprocal of the condition number of the matrix A,
+///     computed as stated above.
+///
+/// @param[in] opts
+///     Additional options, as map of name = value pairs. Possible options:
+///     - Option::Target:
+///       Implementation to target. Possible values:
+///       - HostTask:  OpenMP tasks on CPU host [default].
+///       - HostNest:  nested OpenMP parallel for loop on CPU host.
+///       - HostBatch: batched BLAS on CPU host.
+///       - Devices:   batched BLAS on GPU device.
+///
+/// @ingroup cond_specialization
+///
+template <typename scalar_t>
+void pocondest(
+           Norm in_norm,
+           HermitianMatrix<scalar_t>& A,
+           blas::real_type<scalar_t> Anorm,
+           blas::real_type<scalar_t> *rcond,
+           Options const& opts)
+{
+    using blas::real;
+    using real_t = blas::real_type<scalar_t>;
+
+    int kase;
+    if (in_norm != Norm::One && in_norm != Norm::Inf) {
+        slate_error("invalid norm.");
+    }
+
+    int64_t m = A.m();
+
+    // Quick return
+    *rcond = 0.;
+    if (m <= 1) {
+        *rcond = 1.;
+        return;
+    }
+    else if (Anorm == 0.) {
+        return;
+    }
+
+    real_t Ainvnorm = 0.0;
+
+    std::vector<int64_t> isave = {0, 0, 0, 0};
+
+    auto tileMb = A.tileMbFunc();
+    auto tileNb = func::uniform_blocksize(1, 1);
+    auto tileRank = A.tileRankFunc();
+    auto tileDevice = A.tileDeviceFunc();
+    slate::Matrix<scalar_t> X (m, 1, tileMb, tileNb,
+                               tileRank, tileDevice, A.mpiComm());
+    X.insertLocalTiles(Target::Host);
+    slate::Matrix<scalar_t> V (m, 1, tileMb, tileNb,
+                               tileRank, tileDevice, A.mpiComm());
+    V.insertLocalTiles(Target::Host);
+    slate::Matrix<int64_t> isgn (m, 1, tileMb, tileNb,
+                                 tileRank, tileDevice, A.mpiComm());
+    isgn.insertLocalTiles(Target::Host);
+
+    // initial and final value of kase is 0
+    kase = 0;
+    internal::norm1est( X, V, isgn, &Ainvnorm, &kase, isave, opts);
+
+    MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() );
+    MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );
+
+    while (kase != 0)
+    {
+        // A is symmetric, so both cases are equivalent
+        potrs( A, X, opts );
+
+        internal::norm1est( X, V, isgn, &Ainvnorm, &kase, isave, opts);
+        MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() );
+        MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );
+    } // while (kase != 0)
+
+    // Compute the estimate of the reciprocal condition number.
+    if (Ainvnorm != 0.0) {
+        *rcond = (1.0 / Ainvnorm) / Anorm;
+    }
+}
+
+//------------------------------------------------------------------------------
+// Explicit instantiations.
+template
+void pocondest<float>(
+    Norm in_norm,
+    HermitianMatrix<float>& A,
+    float Anorm,
+    float *rcond,
+    Options const& opts);
+
+template
+void pocondest<double>(
+    Norm in_norm,
+    HermitianMatrix<double>& A,
+    double Anorm,
+    double *rcond,
+    Options const& opts);
+
+template
+void pocondest< std::complex<float> >(
+    Norm in_norm,
+    HermitianMatrix< std::complex<float> >& A,
+    float Anorm,
+    float *rcond,
+    Options const& opts);
+
+template
+void pocondest< std::complex<double> >(
+    Norm in_norm,
+    HermitianMatrix< std::complex<double> >& A,
+    double Anorm,
+    double *rcond,
+    Options const& opts);
+
+} // namespace slate
diff --git a/test/run_tests.py b/test/run_tests.py
index 4e9983c0e..0dd06e3e1 100755
--- a/test/run_tests.py
+++ b/test/run_tests.py
@@ -614,12 +614,12 @@ def filter_csv( values, csv ):
 if (opts.cond):
     cmds += [
     [ 'gecondest', gen + dtype + n ],
+    [ 'pocondest', gen + dtype + n + uplo ],
 
     # Triangle
     [ 'trcondest', gen + dtype + n ],
 
     #[ 'gbcon', gen + dtype + la + n  + kl + ku ],
-    #[ 'pocon', gen + dtype + la + n + uplo ],
     #[ 'pbcon', gen + dtype + la + n + kd + uplo ],
     ]
 
diff --git a/test/scalapack_wrappers.hh b/test/scalapack_wrappers.hh
index 90341f09d..c62dc465b 100644
--- a/test/scalapack_wrappers.hh
+++ b/test/scalapack_wrappers.hh
@@ -4624,6 +4624,126 @@ void scalapack_pgecon(
         Anorm, rcond, work, &lwork_, iwork, &liwork_, &info_ );
 }
 
+//==============================================================================
+// Fortran prototypes
+#define scalapack_pspocon BLAS_FORTRAN_NAME( pspocon, PSPOCON )
+#define scalapack_pdpocon BLAS_FORTRAN_NAME( pdpocon, PDPOCON )
+#define scalapack_pcpocon BLAS_FORTRAN_NAME( pcpocon, PCPOCON )
+#define scalapack_pzpocon BLAS_FORTRAN_NAME( pzpocon, PZPOCON )
+
+extern "C" {
+
+void scalapack_pspocon(
+    const char* uplo, const char* norm, blas_int* n,
+    float* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    float* Anorm, float* rcond,
+    float* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info );
+
+void scalapack_pdpocon(
+    const char* uplo, const char* norm, blas_int* n,
+    double* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    double* Anorm, double* rcond,
+    double* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info );
+
+void scalapack_pcpocon(
+    const char* uplo, const char* norm, blas_int* n,
+    std::complex<float>* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    float* Anorm, float* rcond,
+    std::complex<float>* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info );
+
+void scalapack_pzpocon(
+    const char* uplo, const char* norm, blas_int* n,
+    std::complex<double>* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    double* Anorm, double* rcond,
+    std::complex<double>* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info );
+
+} // extern C
+
+//------------------------------------------------------------------------------
+// Low-level overloaded wrappers
+inline void scalapack_ppocon(
+    const char* uplo, const char* norm, blas_int* n,
+    float* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    float* Anorm, float* rcond,
+    float* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info )
+{
+    scalapack_pspocon(
+        uplo, norm, n, A, ia, ja, descA,
+        Anorm, rcond, work, lwork, iwrok, liwork, info );
+}
+
+inline void scalapack_ppocon(
+    const char* uplo, const char* norm, blas_int* n,
+    double* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    double* Anorm, double* rcond,
+    double* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info )
+{
+    scalapack_pdpocon(
+        uplo, norm, n, A, ia, ja, descA,
+        Anorm, rcond, work, lwork, iwrok, liwork, info );
+}
+
+inline void scalapack_ppocon(
+    const char* uplo, const char* norm, blas_int* n,
+    std::complex<float>* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    float* Anorm, float* rcond,
+    std::complex<float>* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info )
+{
+    scalapack_pcpocon(
+        uplo, norm, n, A, ia, ja, descA,
+        Anorm, rcond, work, lwork, iwrok, liwork, info );
+}
+
+inline void scalapack_ppocon(
+    const char* uplo, const char* norm, blas_int* n,
+    std::complex<double>* A, blas_int* ia, blas_int* ja, blas_int* descA,
+    double* Anorm, double* rcond,
+    std::complex<double>* work, blas_int* lwork,
+    blas_int* iwrok, blas_int* liwork,
+    blas_int* info )
+{
+    scalapack_pzpocon(
+        uplo, norm, n, A, ia, ja, descA,
+        Anorm, rcond, work, lwork, iwrok, liwork, info );
+}
+
+//------------------------------------------------------------------------------
+// Templated wrapper
+template <typename scalar_t>
+void scalapack_ppocon(
+    const char* uplo, const char* norm, int64_t n,
+    scalar_t* A, int64_t ia, int64_t ja, blas_int* descA,
+    blas::real_type<scalar_t>* Anorm, blas::real_type<scalar_t>* rcond,
+    scalar_t* work, int64_t lwork,
+    blas_int* iwork, int64_t liwork,
+    int64_t* info )
+{
+    blas_int n_      = to_blas_int( n );
+    blas_int ia_     = to_blas_int( ia );
+    blas_int ja_     = to_blas_int( ja );
+    blas_int lwork_  = to_blas_int( lwork );
+    blas_int liwork_ = to_blas_int( liwork );
+    blas_int info_   = 0;
+    scalapack_ppocon(
+        uplo, norm, &n_, A, &ia_, &ja_, descA,
+        Anorm, rcond, work, &lwork_, iwork, &liwork_, &info_ );
+}
+
+
 //==============================================================================
 // Fortran prototypes
 #define scalapack_pstrcon BLAS_FORTRAN_NAME( pstrcon, PSTRCON )
diff --git a/test/test.cc b/test/test.cc
index 81742fbd5..9146f8f9c 100644
--- a/test/test.cc
+++ b/test/test.cc
@@ -157,6 +157,7 @@ std::vector< testsweeper::routines_t > routines = {
 
     { "potri",              test_potri,        Section::posv },
     { "",                   nullptr,           Section::newline },
+    { "pocondest",          test_pocondest,    Section::posv },
 
     // -----
     // symmetric indefinite
diff --git a/test/test.hh b/test/test.hh
index afb1ac0d8..7aa3760d3 100644
--- a/test/test.hh
+++ b/test/test.hh
@@ -218,8 +218,9 @@ void test_trtri      (Params& params, bool run);
 void test_gbsv   (Params& params, bool run);
 
 // Cholesky
-void test_posv   (Params& params, bool run);
-void test_potri  (Params& params, bool run);
+void test_posv      (Params& params, bool run);
+void test_pocondest (Params& params, bool run);
+void test_potri     (Params& params, bool run);
 
 // Cholesky, band
 void test_pbsv   (Params& params, bool run);
diff --git a/test/test_gecondest.cc b/test/test_gecondest.cc
index 0ff2f8775..7deff8801 100644
--- a/test/test_gecondest.cc
+++ b/test/test_gecondest.cc
@@ -179,8 +179,9 @@ void test_gecondest_work(Params& params, bool run)
 
 
         double time = barrier_get_wtime(MPI_COMM_WORLD);
-
-        slate::gecondest(norm, A, &Anorm, &slate_rcond, opts);
+        slate::lu_condest_using_factor(norm, A, Anorm, &slate_rcond, opts);
+        // Using traditional BLAS/LAPACK name
+        // slate::gecondest(norm, A, Anorm, &slate_rcond, opts);
         time = barrier_get_wtime(MPI_COMM_WORLD) - time;
         // compute and save timing/performance
         params.time() = time;
diff --git a/test/test_pocondest.cc b/test/test_pocondest.cc
new file mode 100644
index 000000000..2913bcc49
--- /dev/null
+++ b/test/test_pocondest.cc
@@ -0,0 +1,309 @@
+// Copyright (c) 2017-2022, University of Tennessee. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
+
+#include "slate/slate.hh"
+#include "test.hh"
+#include "blas/flops.hh"
+#include "lapack/flops.hh"
+#include "print_matrix.hh"
+#include "grid_utils.hh"
+
+#include "scalapack_wrappers.hh"
+#include "scalapack_support_routines.hh"
+#include "scalapack_copy.hh"
+
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <utility>
+
+//------------------------------------------------------------------------------
+template <typename scalar_t>
+void test_pocondest_work(Params& params, bool run)
+{
+    using real_t = blas::real_type<scalar_t>;
+    using blas::real;
+
+    // Constants
+    const scalar_t zero = 0;
+    const scalar_t one = 1;
+    real_t rone = 1.;
+
+    auto method = params.method_lu();
+
+    // get & mark input values
+    slate::Norm norm = params.norm();
+    slate::Uplo uplo = params.uplo();
+    int64_t n = params.dim.n();
+    int64_t p = params.grid.m();
+    int64_t q = params.grid.n();
+    int64_t nb = params.nb();
+    int64_t ib = params.ib();
+    int64_t lookahead = params.lookahead();
+    int64_t panel_threads = params.panel_threads();
+    bool ref_only = params.ref() == 'o';
+    bool ref = params.ref() == 'y' || ref_only;
+    bool check = params.check() == 'y' && ! ref_only;
+    bool trace = params.trace() == 'y';
+    int verbose = params.verbose();
+    SLATE_UNUSED(verbose);
+    slate::Origin origin = params.origin();
+    slate::Target target = params.target();
+    slate::GridOrder grid_order = params.grid_order();
+    params.matrix.mark();
+    params.matrixB.mark();
+
+    // mark non-standard output values
+    params.time();
+    params.gflops();
+    params.ref_time();
+    params.ref_gflops();
+    params.time2();
+    params.time2.name( "chol time (s)" );
+    params.time2.width( 12 );
+    params.gflops2();
+    params.gflops2.name( "chol gflop/s" );
+
+    params.error();
+    params.error.name( "slate-exact" );
+    params.error2();
+    params.error2.name( "scl-exact" );
+    params.error3();
+    params.error3.name( "slate-scl" );
+
+    params.value();
+    params.value.name( "slate" );
+    params.value2();
+    params.value2.name( "exact" );
+    params.value3();
+    params.value3.name( "scl" );
+
+    if (! run) {
+        params.matrix.kind.set_default( "rand_dominant" );
+        params.tol() = 0.75;
+        return;
+    }
+
+    // MPI variables
+    int mpi_rank, myrow, mycol;
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+    gridinfo( mpi_rank, grid_order, p, q, &myrow, &mycol );
+
+    slate::Options const opts =  {
+        {slate::Option::Lookahead, lookahead},
+        {slate::Option::Target, target},
+        {slate::Option::MaxPanelThreads, panel_threads},
+        {slate::Option::InnerBlocking, ib},
+        {slate::Option::MethodLU, method},
+    };
+
+    // Matrix A: figure out local size.
+    int64_t mlocA = num_local_rows_cols(n, nb, myrow, p);
+    int64_t nlocA = num_local_rows_cols(n, nb, mycol, q);
+    int64_t lldA  = blas::max(1, mlocA); // local leading dimension of A
+
+    std::vector<scalar_t> A_data;
+    slate::HermitianMatrix<scalar_t> A;
+    if (origin != slate::Origin::ScaLAPACK) {
+        // SLATE allocates CPU or GPU tiles.
+        slate::Target origin_target = origin2target(origin);
+        A = slate::HermitianMatrix<scalar_t>(
+                uplo, n, nb, p, q, MPI_COMM_WORLD );
+        A.insertLocalTiles(origin_target);
+    }
+    else {
+        // Create SLATE matrix from the ScaLAPACK layouts
+        A_data.resize( lldA * nlocA );
+        A = slate::HermitianMatrix<scalar_t>::fromScaLAPACK(
+            uplo, n, &A_data[0], lldA, nb, grid_order, p, q, MPI_COMM_WORLD );
+    }
+
+    slate::Matrix<scalar_t> Id;
+    if (check) {
+        slate::Target origin_target = origin2target(origin);
+        Id = slate::Matrix<scalar_t>(
+                n, n,    nb, p, q, MPI_COMM_WORLD );
+        Id.insertLocalTiles(origin_target);
+    }
+
+    slate::generate_matrix(params.matrix,  A);
+    print_matrix("A", A, params);
+
+    // If ref is required, copy test data.
+    slate::HermitianMatrix<scalar_t> Aref;
+    std::vector<scalar_t> Aref_data;
+    if (ref) {
+        Aref_data.resize( lldA* nlocA );
+        Aref = slate::HermitianMatrix<scalar_t>::fromScaLAPACK(
+                uplo, n, &Aref_data[0], lldA, nb,
+                grid_order, p, q, MPI_COMM_WORLD );
+
+        slate::copy(A, Aref);
+    }
+
+    double gflop = lapack::Gflop<scalar_t>::getrf(n, n);
+
+    // Compute the matrix norm
+    real_t Anorm = 0;
+    Anorm = slate::norm(norm, A, opts);
+    real_t slate_rcond = 0., scl_rcond = 0., exact_rcond = 0.;
+
+    if (! ref_only) {
+
+        if (trace) slate::trace::Trace::on();
+        else slate::trace::Trace::off();
+
+        //==================================================
+        // Run SLATE test: potrf followed by pocondest
+        // potrf: Factor A = LL^T.
+        // pocondest:  Solve AX = B, including factoring A.
+        //==================================================
+
+        double time2 = barrier_get_wtime(MPI_COMM_WORLD);
+        slate::chol_factor(A, opts);
+        // Using traditional BLAS/LAPACK name
+        // slate::potrf(A, opts);
+        // compute and save timing/performance
+        time2 = barrier_get_wtime(MPI_COMM_WORLD) - time2;
+        params.time2() = time2;
+        params.gflops2() = gflop / time2;
+
+
+
+        double time = barrier_get_wtime(MPI_COMM_WORLD);
+        slate::chol_condest_using_factor(norm, A, Anorm, &slate_rcond, opts);
+        // Using traditional BLAS/LAPACK name
+        // slate::pocondest(norm, A, Anorm, &slate_rcond, opts);
+        time = barrier_get_wtime(MPI_COMM_WORLD) - time;
+        // compute and save timing/performance
+        params.time() = time;
+        params.gflops() = gflop / time;
+
+        if (trace) slate::trace::Trace::finish();
+    }
+
+    if (check) {
+        // Find the exact condition number:
+        // A * A^-1  = Id
+        // LU * A^-1 = Id
+        // U * A^-1 = trsm(L, Id), Id will be overwritten by the first solve
+        // A^-1     = trsm(U, Id), Id will be overwritten by the second solve
+        // Id is the inverse of the matrix A, norm(A^-1) = norm(Id)
+
+        // Set the Id matrix to identity
+        set(zero, one, Id);
+        chol_solve_using_factor( A, Id, opts );
+        exact_rcond = (1. / slate::norm(norm, Id, opts)) / Anorm;
+    }
+
+    if (ref) {
+        #ifdef SLATE_HAVE_SCALAPACK
+            // A comparison with a reference routine from ScaLAPACK for timing only
+
+            // BLACS/MPI variables
+            blas_int ictxt, myrow_, mycol_, p_, q_;
+            blas_int mpi_rank_ = 0, nprocs = 1;
+            // initialize BLACS and ScaLAPACK
+            Cblacs_pinfo(&mpi_rank_, &nprocs);
+            slate_assert(p*q <= nprocs);
+            Cblacs_get(-1, 0, &ictxt);
+            Cblacs_gridinit( &ictxt, grid_order2str( grid_order ), p, q );
+            Cblacs_gridinfo(ictxt, &p_, &q_, &myrow_, &mycol_);
+            slate_assert( p == p_ );
+            slate_assert( q == q_ );
+            slate_assert( myrow == myrow_ );
+            slate_assert( mycol == mycol_ );
+
+            // ScaLAPACK descriptor for the reference matrix
+            int64_t info;
+            blas_int Aref_desc[9];
+            scalapack_descinit(Aref_desc, n, n, nb, nb, 0, 0, ictxt, mlocA, &info);
+            slate_assert(info == 0);
+
+            //==================================================
+            // Run ScaLAPACK reference routine.
+            //==================================================
+            scalapack_ppotrf(
+                uplo2str(uplo), n, &Aref_data[0], 1, 1, Aref_desc, &info);
+            slate_assert( info == 0 );
+
+            // query for workspace size for ppocon
+            int64_t lwork = -1;
+            int64_t liwork = -1;
+            scalar_t dummy;
+            blas_int idummy;
+            scalapack_ppocon( uplo2str(uplo), norm2str(norm), n,
+                              &Aref_data[0], 1, 1, Aref_desc,
+                              &Anorm, &scl_rcond,
+                              &dummy, lwork, &idummy, liwork, &info );
+            slate_assert( info == 0 );
+            lwork = (int64_t)( real( dummy ) );
+            liwork = (int64_t)( real( idummy ) );
+
+            // Compute the condition number using scalapack
+            std::vector<scalar_t> work(lwork);
+            std::vector<blas_int> iwork(liwork);
+
+            // todo: ScaLAPCK pzpocon has a seg fault
+
+            double time = barrier_get_wtime(MPI_COMM_WORLD);
+            scalapack_ppocon( uplo2str(uplo), norm2str(norm), n,
+                              &Aref_data[0], 1, 1, Aref_desc,
+                              &Anorm, &scl_rcond,
+                              &work[0], lwork, &iwork[0], liwork, &info );
+            slate_assert( info == 0 );
+            time = barrier_get_wtime(MPI_COMM_WORLD) - time;
+
+            params.ref_time() = time;
+            params.ref_gflops() = gflop / time;
+
+            Cblacs_gridexit(ictxt);
+
+        #else  // not SLATE_HAVE_SCALAPACK
+            if (mpi_rank == 0)
+                printf( "ScaLAPACK not available\n" );
+        #endif
+    }
+
+    // Compute the error
+    params.error()  = std::abs( rone/slate_rcond - rone/exact_rcond ) / (rone/exact_rcond);
+    params.error2() = std::abs( rone/scl_rcond - rone/exact_rcond ) / (rone/exact_rcond);
+    params.error3() = std::abs(slate_rcond - scl_rcond);
+
+    // Printf out the condest
+    params.value()  = slate_rcond;
+    params.value2() = exact_rcond;
+    params.value3() = scl_rcond;
+
+    real_t tol = params.tol();
+    params.okay() = (params.error() <= tol);
+
+}
+
+// -----------------------------------------------------------------------------
+void test_pocondest(Params& params, bool run)
+{
+    switch (params.datatype()) {
+        case testsweeper::DataType::Single:
+            test_pocondest_work<float> (params, run);
+            break;
+
+        case testsweeper::DataType::Double:
+            test_pocondest_work<double> (params, run);
+            break;
+
+        case testsweeper::DataType::SingleComplex:
+            test_pocondest_work<std::complex<float>> (params, run);
+            break;
+
+        case testsweeper::DataType::DoubleComplex:
+            test_pocondest_work<std::complex<double>> (params, run);
+            break;
+
+        default:
+            throw std::runtime_error( "unknown datatype" );
+            break;
+    }
+}
diff --git a/test/test_trcondest.cc b/test/test_trcondest.cc
index 6e6e2df0c..3557a5556 100644
--- a/test/test_trcondest.cc
+++ b/test/test_trcondest.cc
@@ -154,7 +154,9 @@ void test_trcondest_work(Params& params, bool run)
         double time = barrier_get_wtime(MPI_COMM_WORLD);
         auto R  = slate::TriangularMatrix<scalar_t>(
             slate::Uplo::Upper, slate::Diag::NonUnit, A );
-        slate::trcondest(norm, R, &slate_rcond, opts);
+        slate::triangular_condest(norm, R, &slate_rcond, opts);
+        // Using traditional BLAS/LAPACK name
+        // slate::trcondest(norm, R, &slate_rcond, opts);
         time = barrier_get_wtime(MPI_COMM_WORLD) - time;
         // compute and save timing/performance
         params.time() = time;