Skip to content

Commit

Permalink
Merge pull request #141 from neil-lindquist/pocondest
Browse files Browse the repository at this point in the history
Add pocondest routine and add condition estimators to simplified API
  • Loading branch information
mgates3 authored Nov 8, 2023
2 parents f1c8490 + 3c88cda commit 4323f4b
Show file tree
Hide file tree
Showing 13 changed files with 679 additions and 16 deletions.
2 changes: 2 additions & 0 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ ifneq ($(only_unit),1)
src/pbsv.cc \
src/pbtrf.cc \
src/pbtrs.cc \
src/pocondest.cc \
src/posv.cc \
src/posv_mixed.cc \
src/posv_mixed_gmres.cc \
Expand Down Expand Up @@ -731,6 +732,7 @@ tester_src += \
test/test_herk.cc \
test/test_hesv.cc \
test/test_pbsv.cc \
test/test_pocondest.cc \
test/test_posv.cc \
test/test_potri.cc \
test/test_scale.cc \
Expand Down
6 changes: 3 additions & 3 deletions include/slate/BaseMatrix.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2535,6 +2535,9 @@ void BaseMatrix<scalar_t>::tileCopyDataLayout(Tile<scalar_t>* src_tile,
// TODO Consider inter-Queue dependencies instead of disabling async
async &= !(dst_device != HostNum && src_device != HostNum);

if (dst_tile->layout() != target_layout && ! dst_tile->isTransposable()) {
storage_->tileMakeTransposable( dst_tile );
}

if (is_square || ! need_convert) {
lapack::Queue* queue = comm_queue( work_device );
Expand All @@ -2546,9 +2549,6 @@ void BaseMatrix<scalar_t>::tileCopyDataLayout(Tile<scalar_t>* src_tile,
}
}
else {
if (dst_tile->layout() != target_layout && ! dst_tile->isTransposable()) {
storage_->tileMakeTransposable( dst_tile );
}
dst_tile->setLayout( target_layout );

scalar_t* work_data = nullptr;
Expand Down
45 changes: 45 additions & 0 deletions include/slate/simplified_api.hh
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,21 @@ void lu_inverse_using_factor_out_of_place(
getri(A, pivots, A_inverse, opts);
}

//-----------------------------------------
// lu_condest_using_factor()

// gecondest
template <typename scalar_t>
void lu_condest_using_factor(
Norm norm,
Matrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts = Options())
{
gecondest(norm, A, Anorm, rcond, opts);
}

//-----------------------------------------
// Cholesky

Expand Down Expand Up @@ -475,6 +490,21 @@ void chol_inverse_using_factor(
potri(A, opts);
}

//-----------------------------------------
// chol_condest_using_factor()

// pocondest
template <typename scalar_t>
void chol_condest_using_factor(
Norm norm,
HermitianMatrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts = Options())
{
pocondest(norm, A, Anorm, rcond, opts);
}

//-----------------------------------------
// Symmetric indefinite -- block Aasen's

Expand Down Expand Up @@ -649,6 +679,21 @@ void lq_multiply_by_q(
unmlq(side, op, A, T, C, opts);
}

//-----------------------------------------
// triangular_condest()

// trcondest
template <typename scalar_t>
void triangular_condest(
Norm norm,
TriangularMatrix<scalar_t>& A,
blas::real_type<scalar_t> *rcond,
Options const& opts = Options())
{
trcondest(norm, A, rcond, opts);
}


//------------------------------------------------------------------------------
// Symmetric/Hermitian Eigenvalues

Expand Down
22 changes: 22 additions & 0 deletions include/slate/slate.hh
Original file line number Diff line number Diff line change
Expand Up @@ -1345,11 +1345,33 @@ void steqr2(
//-----------------------------------------
// gecondest()
template <typename scalar_t>
void gecondest(
Norm in_norm,
Matrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts = Options());

template <typename scalar_t>
[[deprecated( "Pass Anorm by value instead. Will be removed 2024-11." )]]
void gecondest(
Norm in_norm,
Matrix<scalar_t>& A,
blas::real_type<scalar_t> *Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts = Options())
{
gecondest( in_norm, A, *Anorm, rcond, opts );
}

//-----------------------------------------
// pocondest()
template <typename scalar_t>
void pocondest(
Norm in_norm,
HermitianMatrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts = Options());

//-----------------------------------------
Expand Down
14 changes: 7 additions & 7 deletions src/gecondest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ template <typename scalar_t>
void gecondest(
Norm in_norm,
Matrix<scalar_t>& A,
blas::real_type<scalar_t> *Anorm,
blas::real_type<scalar_t> Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts)
{
Expand All @@ -83,7 +83,7 @@ void gecondest(
*rcond = 1.;
return;
}
else if (*Anorm == 0.) {
else if (Anorm == 0.) {
return;
}

Expand Down Expand Up @@ -144,7 +144,7 @@ void gecondest(

// Compute the estimate of the reciprocal condition number.
if (Ainvnorm != 0.0) {
*rcond = (1.0 / Ainvnorm) / *Anorm;
*rcond = (1.0 / Ainvnorm) / Anorm;
}
}

Expand All @@ -154,31 +154,31 @@ template
void gecondest<float>(
Norm in_norm,
Matrix<float>& A,
float *Anorm,
float Anorm,
float *rcond,
Options const& opts);

template
void gecondest<double>(
Norm in_norm,
Matrix<double>& A,
double *Anorm,
double Anorm,
double *rcond,
Options const& opts);

template
void gecondest< std::complex<float> >(
Norm in_norm,
Matrix< std::complex<float> >& A,
float *Anorm,
float Anorm,
float *rcond,
Options const& opts);

template
void gecondest< std::complex<double> >(
Norm in_norm,
Matrix< std::complex<double> >& A,
double *Anorm,
double Anorm,
double *rcond,
Options const& opts);

Expand Down
160 changes: 160 additions & 0 deletions src/pocondest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright (c) 2017-2022, University of Tennessee. All rights reserved.
// SPDX-License-Identifier: BSD-3-Clause
// This program is free software: you can redistribute it and/or modify it under
// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.

#include "slate/slate.hh"
#include "auxiliary/Debug.hh"
#include "slate/Matrix.hh"
#include "slate/HermitianMatrix.hh"
#include "internal/internal.hh"

namespace slate {

//------------------------------------------------------------------------------
/// Distributed parallel estimate of the reciprocal of the condition number
/// of a general matrix A, in either the 1-norm or the infinity-norm.
/// Generic implementation for any target.
///
/// The reciprocal of the condition number computed as:
/// \[
/// rcond = \frac{1}{\|\|A\|\| \times \|\|A^{-1}\|\-}
/// \]
/// where $A$ is the output of the Cholesky factorization (potrf).
///
//------------------------------------------------------------------------------
/// @tparam scalar_t
/// One of float, double, std::complex<float>, std::complex<double>.
//------------------------------------------------------------------------------
/// @param[in] in_norm
/// rcond to compute:
/// - Norm::One: 1-norm condition number
/// - Norm::Inf: infinity-norm condition number
///
/// @param[in] A
/// On entry, the n-by-n matrix $A$.
/// It is the output of the Cholesky factorization of a Hermitian matrix.
///
/// @param[in] Anorm
/// If Norm::One, the 1-norm of the original matrix A.
/// If Norm::Inf, the infinity-norm of the original matrix A.
///
/// @param[in,out] rcond
/// The reciprocal of the condition number of the matrix A,
/// computed as stated above.
///
/// @param[in] opts
/// Additional options, as map of name = value pairs. Possible options:
/// - Option::Target:
/// Implementation to target. Possible values:
/// - HostTask: OpenMP tasks on CPU host [default].
/// - HostNest: nested OpenMP parallel for loop on CPU host.
/// - HostBatch: batched BLAS on CPU host.
/// - Devices: batched BLAS on GPU device.
///
/// @ingroup cond_specialization
///
template <typename scalar_t>
void pocondest(
Norm in_norm,
HermitianMatrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
blas::real_type<scalar_t> *rcond,
Options const& opts)
{
using blas::real;
using real_t = blas::real_type<scalar_t>;

int kase;
if (in_norm != Norm::One && in_norm != Norm::Inf) {
slate_error("invalid norm.");
}

int64_t m = A.m();

// Quick return
*rcond = 0.;
if (m <= 1) {
*rcond = 1.;
return;
}
else if (Anorm == 0.) {
return;
}

real_t Ainvnorm = 0.0;

std::vector<int64_t> isave = {0, 0, 0, 0};

auto tileMb = A.tileMbFunc();
auto tileNb = func::uniform_blocksize(1, 1);
auto tileRank = A.tileRankFunc();
auto tileDevice = A.tileDeviceFunc();
slate::Matrix<scalar_t> X (m, 1, tileMb, tileNb,
tileRank, tileDevice, A.mpiComm());
X.insertLocalTiles(Target::Host);
slate::Matrix<scalar_t> V (m, 1, tileMb, tileNb,
tileRank, tileDevice, A.mpiComm());
V.insertLocalTiles(Target::Host);
slate::Matrix<int64_t> isgn (m, 1, tileMb, tileNb,
tileRank, tileDevice, A.mpiComm());
isgn.insertLocalTiles(Target::Host);

// initial and final value of kase is 0
kase = 0;
internal::norm1est( X, V, isgn, &Ainvnorm, &kase, isave, opts);

MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() );
MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );

while (kase != 0)
{
// A is symmetric, so both cases are equivalent
potrs( A, X, opts );

internal::norm1est( X, V, isgn, &Ainvnorm, &kase, isave, opts);
MPI_Bcast( &isave[0], 4, MPI_INT64_T, X.tileRank(0, 0), A.mpiComm() );
MPI_Bcast( &kase, 1, MPI_INT, X.tileRank(0, 0), A.mpiComm() );
} // while (kase != 0)

// Compute the estimate of the reciprocal condition number.
if (Ainvnorm != 0.0) {
*rcond = (1.0 / Ainvnorm) / Anorm;
}
}

//------------------------------------------------------------------------------
// Explicit instantiations.
template
void pocondest<float>(
Norm in_norm,
HermitianMatrix<float>& A,
float Anorm,
float *rcond,
Options const& opts);

template
void pocondest<double>(
Norm in_norm,
HermitianMatrix<double>& A,
double Anorm,
double *rcond,
Options const& opts);

template
void pocondest< std::complex<float> >(
Norm in_norm,
HermitianMatrix< std::complex<float> >& A,
float Anorm,
float *rcond,
Options const& opts);

template
void pocondest< std::complex<double> >(
Norm in_norm,
HermitianMatrix< std::complex<double> >& A,
double Anorm,
double *rcond,
Options const& opts);

} // namespace slate
2 changes: 1 addition & 1 deletion test/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,12 +614,12 @@ def filter_csv( values, csv ):
if (opts.cond):
cmds += [
[ 'gecondest', gen + dtype + n ],
[ 'pocondest', gen + dtype + n + uplo ],

# Triangle
[ 'trcondest', gen + dtype + n ],

#[ 'gbcon', gen + dtype + la + n + kl + ku ],
#[ 'pocon', gen + dtype + la + n + uplo ],
#[ 'pbcon', gen + dtype + la + n + kd + uplo ],
]

Expand Down
Loading

0 comments on commit 4323f4b

Please sign in to comment.