Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into device-regions
Browse files Browse the repository at this point in the history
  • Loading branch information
neil-lindquist committed Dec 4, 2023
2 parents 8da4956 + 68ab7f4 commit d8a3527
Show file tree
Hide file tree
Showing 74 changed files with 2,925 additions and 1,368 deletions.
2 changes: 2 additions & 0 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ ifneq ($(only_unit),1)
src/pbsv.cc \
src/pbtrf.cc \
src/pbtrs.cc \
src/pocondest.cc \
src/posv.cc \
src/posv_mixed.cc \
src/posv_mixed_gmres.cc \
Expand Down Expand Up @@ -731,6 +732,7 @@ tester_src += \
test/test_herk.cc \
test/test_hesv.cc \
test/test_pbsv.cc \
test/test_pocondest.cc \
test/test_posv.cc \
test/test_potri.cc \
test/test_scale.cc \
Expand Down
54 changes: 52 additions & 2 deletions examples/ex06_linear_system_lu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ void test_lu_mixed()
slate::gesv_mixed_gmres( A, pivots, B1, X1, iters ); // only one RHS
//---------- end mixed2

printf( "rank %d: iters %d\n", mpi_rank, iters );
if (mpi_rank == 0) {
printf( "rank %d: iters %d\n", mpi_rank, iters );
}
}

//------------------------------------------------------------------------------
Expand All @@ -94,21 +96,27 @@ void test_lu_factor()

int64_t n=1000, nrhs=100, nb=256;

//---------- begin factor1
slate::Matrix<scalar_type> A( n, n, nb, grid_p, grid_q, MPI_COMM_WORLD );
slate::Matrix<scalar_type> B( n, nrhs, nb, grid_p, grid_q, MPI_COMM_WORLD );
slate::Pivots pivots;
// ...
//---------- end factor1

A.insertLocalTiles();
B.insertLocalTiles();
random_matrix( A );
random_matrix( B );
slate::Pivots pivots;

//---------- begin factor2
// simplified API
slate::lu_factor( A, pivots );
slate::lu_solve_using_factor( A, pivots, B );

// traditional API
slate::getrf( A, pivots ); // factor
slate::getrs( A, pivots, B ); // solve
//---------- end factor2
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -140,6 +148,44 @@ void test_lu_inverse()
//---------- end inverse2
}

//------------------------------------------------------------------------------
template <typename scalar_type>
void test_lu_cond()
{
using real_t = blas::real_type<scalar_type>;

print_func( mpi_rank );

int64_t n=1000, nrhs=100, nb=256;

//---------- begin cond1
slate::Matrix<scalar_type> A( n, n, nb, grid_p, grid_q, MPI_COMM_WORLD );
slate::Pivots pivots;
// ...
//---------- end cond1

A.insertLocalTiles();
random_matrix( A );

//---------- begin cond2

// Compute A_norm before factoring.
real_t A_norm = slate::norm( slate::Norm::One, A );

// Factor using lu_factor or lu_solve.
slate::lu_factor( A, pivots );

// reciprocal condition number, 1 / (||A|| * ||A^{-1}||)
real_t A_rcond = slate::lu_rcondest_using_factor( slate::Norm::One, A, A_norm );
real_t A_cond = 1. / A_rcond;
//---------- end cond2

if (mpi_rank == 0) {
printf( "rank %d: norm %.2e, rcond %.2e, cond %.2e\n",
mpi_rank, A_norm, A_rcond, 1 / A_rcond );
}
}

//------------------------------------------------------------------------------
int main( int argc, char** argv )
{
Expand Down Expand Up @@ -173,6 +219,7 @@ int main( int argc, char** argv )
test_lu< float >();
test_lu_factor< float >();
test_lu_inverse< float >();
test_lu_cond< float >();
}
if (mpi_rank == 0)
printf( "\n" );
Expand All @@ -182,6 +229,7 @@ int main( int argc, char** argv )
test_lu_factor< double >();
test_lu_inverse< double >();
test_lu_mixed< double >();
test_lu_cond< double >();
}
if (mpi_rank == 0)
printf( "\n" );
Expand All @@ -190,6 +238,7 @@ int main( int argc, char** argv )
test_lu< std::complex<float> >();
test_lu_factor< std::complex<float> >();
test_lu_inverse< std::complex<float> >();
test_lu_cond< std::complex<float> >();
}
if (mpi_rank == 0)
printf( "\n" );
Expand All @@ -199,6 +248,7 @@ int main( int argc, char** argv )
test_lu_factor< std::complex<double> >();
test_lu_inverse< std::complex<double> >();
test_lu_mixed< std::complex<double> >();
test_lu_cond< std::complex<double> >();
}

slate_mpi_call(
Expand Down
46 changes: 45 additions & 1 deletion examples/ex07_linear_system_cholesky.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ void test_cholesky_mixed()
slate::posv_mixed_gmres( A, B1, X1, iters ); // only one RHS
//---------- end mixed2

printf( "rank %d: iters %d\n", mpi_rank, iters );
if (mpi_rank == 0) {
printf( "rank %d: iters %d\n", mpi_rank, iters );
}
}

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -138,6 +140,44 @@ void test_cholesky_inverse()
//---------- end inverse2
}

//------------------------------------------------------------------------------
template <typename scalar_type>
void test_cholesky_cond()
{
using real_t = blas::real_type<scalar_type>;

print_func( mpi_rank );

int64_t n=1000, nrhs=100, nb=256;

//---------- begin cond1
slate::HermitianMatrix<scalar_type>
A( slate::Uplo::Lower, n, nb, grid_p, grid_q, MPI_COMM_WORLD );
// ...
//---------- end cond1

A.insertLocalTiles();
random_matrix_diag_dominant( A );

//---------- begin cond2

// Compute A_norm before factoring.
real_t A_norm = slate::norm( slate::Norm::One, A );

// Factor using chol_factor or chol_solve.
slate::chol_factor( A );

// reciprocal condition number, 1 / (||A|| * ||A^{-1}||)
real_t A_rcond = slate::chol_rcondest_using_factor( slate::Norm::One, A, A_norm );
real_t A_cond = 1. / A_rcond;
//---------- end cond2

if (mpi_rank == 0) {
printf( "rank %d: norm %.2e, rcond %.2e, cond %.2e\n",
mpi_rank, A_norm, A_rcond, 1 / A_rcond );
}
}

//------------------------------------------------------------------------------
int main( int argc, char** argv )
{
Expand Down Expand Up @@ -171,6 +211,7 @@ int main( int argc, char** argv )
test_cholesky< float >();
test_cholesky_factor< float >();
test_cholesky_inverse< float >();
test_cholesky_cond< float >();
}
if (mpi_rank == 0)
printf( "\n" );
Expand All @@ -180,6 +221,7 @@ int main( int argc, char** argv )
test_cholesky_factor< double >();
test_cholesky_inverse< double >();
test_cholesky_mixed< double >();
test_cholesky_cond< double >();
}
if (mpi_rank == 0)
printf( "\n" );
Expand All @@ -188,6 +230,7 @@ int main( int argc, char** argv )
test_cholesky< std::complex<float> >();
test_cholesky_factor< std::complex<float> >();
test_cholesky_inverse< std::complex<float> >();
test_cholesky_cond< std::complex<float> >();
}
if (mpi_rank == 0)
printf( "\n" );
Expand All @@ -197,6 +240,7 @@ int main( int argc, char** argv )
test_cholesky_factor< std::complex<double> >();
test_cholesky_inverse< std::complex<double> >();
test_cholesky_mixed< std::complex<double> >();
test_cholesky_cond< std::complex<double> >();
}

slate_mpi_call(
Expand Down
6 changes: 3 additions & 3 deletions include/slate/BaseMatrix.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2535,6 +2535,9 @@ void BaseMatrix<scalar_t>::tileCopyDataLayout(Tile<scalar_t>* src_tile,
// TODO Consider inter-Queue dependencies instead of disabling async
async &= !(dst_device != HostNum && src_device != HostNum);

if (dst_tile->layout() != target_layout && ! dst_tile->isTransposable()) {
storage_->tileMakeTransposable( dst_tile );
}

if (is_square || ! need_convert) {
lapack::Queue* queue = comm_queue( work_device );
Expand All @@ -2546,9 +2549,6 @@ void BaseMatrix<scalar_t>::tileCopyDataLayout(Tile<scalar_t>* src_tile,
}
}
else {
if (dst_tile->layout() != target_layout && ! dst_tile->isTransposable()) {
storage_->tileMakeTransposable( dst_tile );
}
dst_tile->setLayout( target_layout );

scalar_t* work_data = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion include/slate/enums.hh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ enum class TileReleaseStrategy : char {
None = 'N', ///< tiles are not release at all
Internal = 'I', ///< tiles are released by routines in slate::internal namespace
Slate = 'S', ///< tiles are released by routines directly in slate namespace
All = 'A', ///< tiles are released by rotines in all namespaces
All = 'A', ///< tiles are released by routines in all namespaces
};

namespace internal {
Expand Down
62 changes: 52 additions & 10 deletions include/slate/simplified_api.hh
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,20 @@ void lu_inverse_using_factor_out_of_place(
getri(A, pivots, A_inverse, opts);
}

//-----------------------------------------
// lu_condest_using_factor()

// gecondest
template <typename scalar_t>
blas::real_type<scalar_t> lu_rcondest_using_factor(
Norm in_norm,
Matrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
Options const& opts = Options())
{
return gecondest( in_norm, A, Anorm, opts );
}

//-----------------------------------------
// Cholesky

Expand All @@ -376,56 +390,56 @@ void chol_solve(

// posv
template <typename scalar_t>
void chol_solve(
int64_t chol_solve(
HermitianMatrix<scalar_t>& A,
Matrix<scalar_t>& B,
Options const& opts = Options())
{
posv(A, B, opts);
return posv( A, B, opts );
}

// forward real-symmetric matrices to posv;
// disabled for complex
template <typename scalar_t>
void chol_solve(
int64_t chol_solve(
SymmetricMatrix<scalar_t>& A,
Matrix<scalar_t>& B,
Options const& opts = Options(),
enable_if_t< ! is_complex<scalar_t>::value >* = nullptr)
{
posv(A, B, opts);
return posv( A, B, opts );
}

//-----------------------------------------
// chol_factor()

// pbtrf
template <typename scalar_t>
void chol_factor(
int64_t chol_factor(
HermitianBandMatrix<scalar_t>& A,
Options const& opts = Options())
{
pbtrf(A, opts);
return pbtrf( A, opts );
}

// potrf
template <typename scalar_t>
void chol_factor(
int64_t chol_factor(
HermitianMatrix<scalar_t>& A,
Options const& opts = Options())
{
potrf(A, opts);
return potrf( A, opts );
}

// forward real-symmetric matrices to potrf;
// disabled for complex
template <typename scalar_t>
void chol_factor(
int64_t chol_factor(
SymmetricMatrix<scalar_t>& A,
Options const& opts = Options(),
enable_if_t< ! is_complex<scalar_t>::value >* = nullptr)
{
potrf(A, opts);
return potrf( A, opts );
}

//-----------------------------------------
Expand Down Expand Up @@ -475,6 +489,20 @@ void chol_inverse_using_factor(
potri(A, opts);
}

//-----------------------------------------
// chol_condest_using_factor()

// pocondest
template <typename scalar_t>
blas::real_type<scalar_t> chol_rcondest_using_factor(
Norm in_norm,
HermitianMatrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
Options const& opts = Options())
{
return pocondest( in_norm, A, Anorm, opts );
}

//-----------------------------------------
// Symmetric indefinite -- block Aasen's

Expand Down Expand Up @@ -649,6 +677,20 @@ void lq_multiply_by_q(
unmlq(side, op, A, T, C, opts);
}

//-----------------------------------------
// triangular_condest()

// trcondest
template <typename scalar_t>
blas::real_type<scalar_t> triangular_rcondest(
Norm in_norm,
TriangularMatrix<scalar_t>& A,
blas::real_type<scalar_t> Anorm,
Options const& opts = Options())
{
return trcondest( in_norm, A, Anorm, opts );
}

//------------------------------------------------------------------------------
// Symmetric/Hermitian Eigenvalues

Expand Down
Loading

0 comments on commit d8a3527

Please sign in to comment.