Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup #190

Merged
merged 5 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -1363,7 +1363,7 @@ ${pkg}:

lib: ${slate} ${matgen}

clean: test/clean unit_test/clean scalapack_api/clean lapack_api/clean include/clean
clean: src/clean test/clean unit_test/clean scalapack_api/clean lapack_api/clean include/clean
rm -f lib/lib* ${dep}
rm -f trace_*.svg

Expand Down
2 changes: 1 addition & 1 deletion include/slate/TrapezoidMatrix.hh
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ TrapezoidMatrix<scalar_t> TrapezoidMatrix<scalar_t>::sub(
/// This version returns a general Matrix that:
/// - if uplo = Lower, is strictly below the diagonal, or
/// - if uplo = Upper, is strictly above the diagonal.
/// @see TrapezoidMatrix sub(int64_t i1, int64_t i2, int64_T j2)
/// @see TrapezoidMatrix sub( int64_t i1, int64_t i2, int64_t j2 )
///
/// @param[in] i1
/// Starting block row index. 0 <= i1 < mt.
Expand Down
41 changes: 23 additions & 18 deletions src/internal/Tile_geqrf.hh
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,14 @@ void geqrf(
using blas::conj;
using real_t = blas::real_type<scalar_t>;

// Constants
const scalar_t zero = 0.0;
const scalar_t one = 1.0;
const real_t r_one = 1.0;
int knt;
// LAPACK eps = 0.5 epsilon from C++
const real_t safe_min = std::numeric_limits<real_t>::min()
/ (0.5 * std::numeric_limits<real_t>::epsilon());
const real_t rsafe_min = r_one / safe_min;

Tile<scalar_t>& diag_tile = tiles.at(0);
int64_t diag_len = std::min( diag_tile.mb(), diag_tile.nb() );
Expand Down Expand Up @@ -148,21 +152,19 @@ void geqrf(
}
thread_barrier.wait(thread_size);

real_t safemin = std::numeric_limits<real_t>::epsilon();
real_t rsafemin = r_one / safemin;
// if Householder norm is numerically "zero", set to identity and exit
if (xnorm < safemin && j < diag_len) {
if (xnorm < safe_min && j < diag_len) {
betas.at(j) = alpha;
taus.at(j) = zero;
diag_tile.at(j,j) = betas.at(j);
}
else {
real_t beta =
-std::copysign(lapack::lapy3(alphr, alphi, xnorm), alphr);
knt = 0;
if (std::abs(beta) < safemin) {
if (knt < 20 && std::abs(beta) < safemin) {
knt += 1;
-std::copysign( std::hypot( alphr, alphi, xnorm ), alphr );
int cnt = 0;
if (std::abs( beta ) < safe_min) {
do {
cnt += 1;
// scale input vector accordingly
for (int64_t idx = thread_rank;
idx < int64_t(tiles.size());
Expand All @@ -173,18 +175,20 @@ void geqrf(

if (i_index == tile_indices.at(0)) {
if (j+1 < tile.mb())
blas::scal(tile.mb()-j-1,
rsafemin, &tile.at(j+1, j), 1);
blas::scal( tile.mb()-j-1, rsafe_min,
&tile.at( j+1, j ), 1 );
}
else {
blas::scal(tile.mb(), rsafemin, &tile.at(0, j), 1);
blas::scal( tile.mb(), rsafe_min,
&tile.at( 0, j ), 1 );
}
}
thread_barrier.wait(thread_size);

beta = beta*rsafemin;
alpha = alpha*rsafemin;
}
beta = beta*rsafe_min;
alpha = alpha*rsafe_min;
} while (cnt < 20 && std::abs( beta ) < safe_min);

//-------------------
// thread local norm
// with scaled vector
Expand Down Expand Up @@ -217,7 +221,8 @@ void geqrf(
thread_barrier.wait(thread_size);
alphr = real(alpha);
alphi = imag(alpha);
beta = -std::copysign(lapack::lapy3(alphr, alphi, xnorm), alphr);
beta = -std::copysign( std::hypot( alphr, alphi, xnorm ),
alphr );
}

// todo: Use overflow-safe division (see CLADIV/ZLADIV)
Expand All @@ -228,8 +233,8 @@ void geqrf(
betas.at(j) = beta;
taus.at(j) = tau;

for (int64_t i = 0; i < knt; ++i) {
beta = beta*safemin;
for (int64_t i = 0; i < cnt; ++i) {
beta = beta*safe_min;
}
//----------------------------------
// column scaling and thread local W
Expand Down
4 changes: 2 additions & 2 deletions src/internal/Tile_getrf.hh
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,8 @@ void getrf(
auto i_index = tile_indices[idx];

// column scaling
real_t sfmin = std::numeric_limits<real_t>::min();
if (cabs1(pivot[j].value()) >= sfmin) {
real_t safe_min = std::numeric_limits<real_t>::min();
if (cabs1( pivot[ j ].value() ) >= safe_min) {
// todo: make it a tile operation
if (i_index == 0) {
// diagonal tile
Expand Down
4 changes: 2 additions & 2 deletions src/internal/Tile_getrf_tntpiv.hh
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ void getrf_tntpiv_local(
auto tile = tiles[ idx ];

// column scaling
real_t sfmin = std::numeric_limits<real_t>::min();
if (cabs1( aux_pivot[ 0 ][ j ].value() ) >= sfmin) {
real_t safe_min = std::numeric_limits<real_t>::min();
if (cabs1( aux_pivot[ 0 ][ j ].value() ) >= safe_min) {
// todo: make it a tile operation
if (idx == 0) {
// diagonal tile
Expand Down
2 changes: 1 addition & 1 deletion src/internal/Tile_householder_reflection_generator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void householder_reflection_generator(
thread_barrier.wait(thread_size);

real_t beta =
-std::copysign(lapack::lapy3(alphr, alphi, xnorm), alphr);
-std::copysign( std::hypot( alphr, alphi, xnorm ), alphr );

scalar_t scal_alpha = one / (alpha-beta);
scalar_t tau = blas::make_scalar<scalar_t>(
Expand Down
8 changes: 4 additions & 4 deletions src/internal/internal_norm1est.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ void norm1est(
using blas::real;
using blas::imag;
const auto mpi_real_type = mpi_type< blas::real_type<scalar_t> >::value;
real_t safmin = std::numeric_limits< real_t >::min();
SLATE_UNUSED(safmin);
real_t safe_min = std::numeric_limits< real_t >::min();
SLATE_UNUSED( safe_min );

const scalar_t one = 1.0;
const scalar_t zero = 0.0;
Expand Down Expand Up @@ -202,7 +202,7 @@ void norm1est(
// for complex number
if constexpr (blas::is_complex<scalar_t>::value) {
real_t absx1 = std::abs( Xi_data[ii] );
if (absx1 > safmin) {
if (absx1 > safe_min) {
scalar_t Xi_ii = blas::MakeScalarTraits<scalar_t>::make(
real( Xi_data[ii] ) / absx1,
imag( Xi_data[ii] ) / absx1 );
Expand Down Expand Up @@ -301,7 +301,7 @@ void norm1est(
auto Xi_data = Xi.data();
for (int64_t ii = 0; ii < X.tileMb(i); ++ii) {
real_t absx1 = std::abs( Xi_data[ii] );
if (absx1 > safmin) {
if (absx1 > safe_min) {
scalar_t Xi_ii = blas::MakeScalarTraits<scalar_t>::make( real( Xi_data[ii] ) / absx1,
imag( Xi_data[ii] ) / absx1);
Xi_data[ii] = Xi_ii;
Expand Down
2 changes: 1 addition & 1 deletion src/stedc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void stedc(
throw std::domain_error( "Input matrix contains Inf or NaN" );

// Scale if necessary.
// todo: steqr scales only if Anorm > sfmax or Anorm < sfmin,
// todo: steqr scales only if Anorm > safe_max or Anorm < safe_min,
// but stedc always scales. Is that right?
lapack::lascl( MatrixType::General, 0, 0, Anorm, one, n, 1, &D[0], n );
lapack::lascl( MatrixType::General, 0, 0, Anorm, one, n-1, 1, &E[0], n-1 );
Expand Down
2 changes: 1 addition & 1 deletion src/stedc_deflate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ void stedc_deflate(
// [ -s c ]
real_t s = -z[ js1 ];
real_t c = z[ js2 ];
real_t tau = lapack::lapy2( c, s ); // == sqrt( c*c + s*s )
real_t tau = std::hypot( c, s ); // == sqrt( c*c + s*s )
s /= tau;
c /= tau;
// Check off-diagonal element after applying G on both sides:
Expand Down
Loading